Skip to content

Commit

Permalink
Merge pull request #1062 from PCMDI/feature/lee1043-mov-modularize
Browse files Browse the repository at this point in the history
Feature/lee1043 mov modularize
  • Loading branch information
lee1043 authored Feb 26, 2024
2 parents 131df5c + ae02779 commit dd09f56
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 148 deletions.
8 changes: 5 additions & 3 deletions pcmdi_metrics/variability_mode/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from .adjust_timeseries import ( # noqa
adjust_timeseries,
get_anomaly_timeseries,
get_residual_timeseries,
)
from .argparse_functions import ( # noqa
AddParserArgument,
VariabilityModeCheck,
Expand All @@ -9,13 +14,10 @@
)
from .dict_merge import dict_merge # noqa
from .eof_analysis import ( # noqa
adjust_timeseries,
arbitrary_checking,
eof_analysis_get_variance_mode,
gain_pcs_fraction,
gain_pseudo_pcs,
get_anomaly_timeseries,
get_residual_timeseries,
linear_regression,
linear_regression_on_globe_for_teleconnection,
)
Expand Down
147 changes: 147 additions & 0 deletions pcmdi_metrics/variability_mode/lib/adjust_timeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import xarray as xr

from pcmdi_metrics.io import (
get_latitude_bounds,
get_latitude_bounds_key,
get_longitude_bounds,
get_longitude_bounds_key,
get_time_key,
region_subset,
select_subset,
)


def adjust_timeseries(
ds: xr.Dataset,
data_var: str,
mode: str,
season: str,
regions_specs: dict = None,
RmDomainMean: bool = True,
) -> xr.Dataset:
"""
Remove annual cycle (for all modes) and get its seasonal mean time series if
needed. Then calculate residual by subtraction domain (or global) average.
Input
- ds: array (t, y, x)
Output
- timeseries_season: array (t, y, x)
"""
if not isinstance(ds, xr.Dataset):
raise TypeError(
"The first parameter of adjust_timeseries must be an xarray Dataset"
)
# Reomove annual cycle (for all modes) and get its seasonal mean time series if needed
ds_anomaly = get_anomaly_timeseries(ds, data_var, season)
# Calculate residual by subtracting domain (or global) average
ds_residual = get_residual_timeseries(
ds_anomaly, data_var, mode, regions_specs, RmDomainMean=RmDomainMean
)
# return result
return ds_residual


def get_anomaly_timeseries(ds: xr.Dataset, data_var: str, season: str) -> xr.Dataset:
"""
Get anomaly time series by removing annual cycle
Input
- timeseries: variable
- season: string
Output
- timeseries_ano: variable
"""
if not isinstance(ds, xr.Dataset):
raise TypeError(
"The first parameter of get_anomaly_timeseries must be an xarray Dataset"
)
# Get anomaly field by removing annual cycle
ds_anomaly = ds.temporal.departures(data_var, freq="month", weighted=True)
# Get temporal average if needed
if season == "yearly":
# yearly time series
ds_anomaly = ds_anomaly.temporal.group_average(
data_var, freq="year", weighted=True
)
# restore bounds (especially time bounds)
ds_anomaly = ds_anomaly.bounds.add_missing_bounds()
# get overall average
ds_ave = ds_anomaly.temporal.average(data_var)
# anomaly
ds_anomaly[data_var] = ds_anomaly[data_var] - ds_ave[data_var]
elif season.upper() in ["DJF", "MAM", "JJA", "SON"]:
ds_anomaly_all_seasons = ds_anomaly.temporal.departures(
data_var,
freq="season",
weighted=True,
season_config={"dec_mode": "DJF", "drop_incomplete_djf": True},
)
ds_anomaly = select_by_season(ds_anomaly_all_seasons, season)
# return result
return ds_anomaly


def select_by_season(ds: xr.Dataset, season: str) -> xr.Dataset:
time_key = get_time_key(ds)
ds_subset = ds.where(ds[time_key].dt.season == season, drop=True)
# Preserve original spatial bounds info
# Extract original bounds
lat_bnds_key = get_latitude_bounds_key(ds)
lon_bnds_key = get_longitude_bounds_key(ds)
# Assign back to the new dataset
ds_subset[lat_bnds_key] = get_latitude_bounds(ds)
ds_subset[lon_bnds_key] = get_longitude_bounds(ds)
return ds_subset


def get_residual_timeseries(
ds_anomaly: xr.Dataset,
data_var: str,
mode: str,
regions_specs: dict = None,
RmDomainMean: bool = True,
) -> xr.Dataset:
"""
Calculate residual by subtracting domain average (or global mean)
Input
- ds_anomaly: anomaly time series, array, 3d (t, y, x)
- mode: string, mode name, must be defined in regions_specs
- RmDomainMean: bool (True or False).
If True, remove domain mean of each time step.
Ref:
Bonfils and Santer (2011)
https://doi.org/10.1007/s00382-010-0920-1
Bonfils et al. (2015)
https://doi.org/10.1175/JCLI-D-15-0341.1
If False, remove global mean of each time step for PDO, or
do nothing for other modes
Default is True for this function.
- region_subdomain: lat lon range of sub domain for given mode, which was
extracted from regions_specs -- that is a dict contains domain
lat lon ragne for given mode
Output
- ds_residual: array, 3d (t, y, x)
"""
if not isinstance(ds_anomaly, xr.Dataset):
raise TypeError(
"The first parameter of get_residual_timeseries must be an xarray Dataset"
)
ds_residual = ds_anomaly.copy()
if RmDomainMean:
# Get domain mean
ds_anomaly_region = region_subset(
ds_anomaly, mode, data_var=data_var, regions_specs=regions_specs
)
ds_anomaly_mean = ds_anomaly_region.spatial.average(data_var)
# Subtract domain mean
ds_residual[data_var] = ds_anomaly[data_var] - ds_anomaly_mean[data_var]
else:
if mode in ["PDO", "NPGO", "AMO"]:
# Get global mean (latitude -60 to 70)
ds_anomaly_subset = select_subset(ds_anomaly, lat=(-60, 70))
ds_anomaly_subset_mean = ds_anomaly_subset.spatial.average(data_var)
# Subtract global mean
ds_residual[data_var] = (
ds_anomaly[data_var] - ds_anomaly_subset_mean[data_var]
)
# return result
return ds_residual
142 changes: 0 additions & 142 deletions pcmdi_metrics/variability_mode/lib/eof_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,10 @@

from pcmdi_metrics.io import (
get_latitude,
get_latitude_bounds,
get_latitude_bounds_key,
get_latitude_key,
get_longitude,
get_longitude_bounds,
get_longitude_bounds_key,
get_longitude_key,
get_time_key,
region_subset,
select_subset,
)
from pcmdi_metrics.utils import calculate_area_weights, calculate_grid_area

Expand Down Expand Up @@ -353,142 +347,6 @@ def gain_pcs_fraction(
return fraction


def adjust_timeseries(
ds: xr.Dataset,
data_var: str,
mode: str,
season: str,
regions_specs: dict = None,
RmDomainMean: bool = True,
) -> xr.Dataset:
"""
Remove annual cycle (for all modes) and get its seasonal mean time series if
needed. Then calculate residual by subtraction domain (or global) average.
Input
- ds: array (t, y, x)
Output
- timeseries_season: array (t, y, x)
"""
if not isinstance(ds, xr.Dataset):
raise TypeError(
"The first parameter of adjust_timeseries must be an xarray Dataset"
)
# Reomove annual cycle (for all modes) and get its seasonal mean time series if needed
ds_anomaly = get_anomaly_timeseries(ds, data_var, season)
# Calculate residual by subtracting domain (or global) average
ds_residual = get_residual_timeseries(
ds_anomaly, data_var, mode, regions_specs, RmDomainMean=RmDomainMean
)
# return result
return ds_residual


def get_anomaly_timeseries(ds: xr.Dataset, data_var: str, season: str) -> xr.Dataset:
"""
Get anomaly time series by removing annual cycle
Input
- timeseries: variable
- season: string
Output
- timeseries_ano: variable
"""
if not isinstance(ds, xr.Dataset):
raise TypeError(
"The first parameter of get_anomaly_timeseries must be an xarray Dataset"
)
# Get anomaly field by removing annual cycle
ds_anomaly = ds.temporal.departures(data_var, freq="month", weighted=True)
# Get temporal average if needed
if season == "yearly":
# yearly time series
ds_anomaly = ds_anomaly.temporal.group_average(
data_var, freq="year", weighted=True
)
# restore bounds (especially time bounds)
ds_anomaly = ds_anomaly.bounds.add_missing_bounds()
# get overall average
ds_ave = ds_anomaly.temporal.average(data_var)
# anomaly
ds_anomaly[data_var] = ds_anomaly[data_var] - ds_ave[data_var]
elif season.upper() in ["DJF", "MAM", "JJA", "SON"]:
ds_anomaly_all_seasons = ds_anomaly.temporal.departures(
data_var,
freq="season",
weighted=True,
season_config={"dec_mode": "DJF", "drop_incomplete_djf": True},
)
ds_anomaly = select_by_season(ds_anomaly_all_seasons, season)
# return result
return ds_anomaly


def select_by_season(ds: xr.Dataset, season: str) -> xr.Dataset:
time_key = get_time_key(ds)
ds_subset = ds.where(ds[time_key].dt.season == season, drop=True)
# Preserve original spatial bounds info
# Extract original bounds
lat_bnds_key = get_latitude_bounds_key(ds)
lon_bnds_key = get_longitude_bounds_key(ds)
# Assign back to the new dataset
ds_subset[lat_bnds_key] = get_latitude_bounds(ds)
ds_subset[lon_bnds_key] = get_longitude_bounds(ds)
return ds_subset


def get_residual_timeseries(
ds_anomaly: xr.Dataset,
data_var: str,
mode: str,
regions_specs: dict = None,
RmDomainMean: bool = True,
) -> xr.Dataset:
"""
Calculate residual by subtracting domain average (or global mean)
Input
- ds_anomaly: anomaly time series, array, 3d (t, y, x)
- mode: string, mode name, must be defined in regions_specs
- RmDomainMean: bool (True or False).
If True, remove domain mean of each time step.
Ref:
Bonfils and Santer (2011)
https://doi.org/10.1007/s00382-010-0920-1
Bonfils et al. (2015)
https://doi.org/10.1175/JCLI-D-15-0341.1
If False, remove global mean of each time step for PDO, or
do nothing for other modes
Default is True for this function.
- region_subdomain: lat lon range of sub domain for given mode, which was
extracted from regions_specs -- that is a dict contains domain
lat lon ragne for given mode
Output
- ds_residual: array, 3d (t, y, x)
"""
if not isinstance(ds_anomaly, xr.Dataset):
raise TypeError(
"The first parameter of get_residual_timeseries must be an xarray Dataset"
)
ds_residual = ds_anomaly.copy()
if RmDomainMean:
# Get domain mean
ds_anomaly_region = region_subset(
ds_anomaly, mode, data_var=data_var, regions_specs=regions_specs
)
ds_anomaly_mean = ds_anomaly_region.spatial.average(data_var)
# Subtract domain mean
ds_residual[data_var] = ds_anomaly[data_var] - ds_anomaly_mean[data_var]
else:
if mode in ["PDO", "NPGO", "AMO"]:
# Get global mean (latitude -60 to 70)
ds_anomaly_subset = select_subset(ds_anomaly, lat=(-60, 70))
ds_anomaly_subset_mean = ds_anomaly_subset.spatial.average(data_var)
# Subtract global mean
ds_residual[data_var] = (
ds_anomaly[data_var] - ds_anomaly_subset_mean[data_var]
)
# return result
return ds_residual


def debug_print(string, debug):
if debug:
nowtime = strftime("%Y-%m-%d %H:%M:%S", gmtime())
Expand Down
8 changes: 5 additions & 3 deletions pcmdi_metrics/variability_mode/lib/lib_variability_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,9 @@ def subset_time(
if not isinstance(eyear, int):
eyear = int(eyear)

# First trimming
time1 = f"{syear}-01-01 00:00:00"
time2 = f"{eyear}-12-{eday} 23:59:59"

# First trimming
ds = select_subset(ds, time=(time1, time2))

# Check available time window and adjust again if needed
Expand All @@ -165,10 +164,10 @@ def subset_time(
"data_syear: " + str(data_syear) + " data_eyear: " + str(data_eyear), debug
)

# Second trimming
if adjust_time_length:
time1 = f"{data_syear}-01-01 00:00:00"
time2 = f"{data_eyear}-12-{eday} 23:59:59"
# Second trimming
ds = select_subset(ds, time=(time1, time2))

return ds
Expand Down Expand Up @@ -234,6 +233,9 @@ def pick_year_last_day(ds):
if "calendar" in ds[time_key].attrs.keys():
if "360" in ds[time_key]["calendar"]:
eday = 30
else:
if "360" in ds[time_key][0].values.item().calendar:
eday = 30
except Exception:
pass
return eday
Expand Down

0 comments on commit dd09f56

Please # to comment.