diff --git a/CHANGELOG.md b/CHANGELOG.md index f96e796..5a444a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added module `benchmark_gcclassic_stats.py` for scraping statistics from GEOS-Chem Classic cloud benchmarks - Added dry deposition velocity comparison plots in 1-month cloud benchmarks - Added `gcpy/benchmark/modules/benchmark_species_changes.py` to compute the table of species changes between versions +- Added `gcpy/kpp/` folder containing scripts to plot output from the KPP-Standalone box model ### Changed - Changed format of `% diff` column from `12.3e` to `12.3f` in benchmark timing tables diff --git a/gcpy/examples/kppsa_visualize.py b/gcpy/examples/kppsa_visualize.py new file mode 100755 index 0000000..c9a3cbd --- /dev/null +++ b/gcpy/examples/kppsa_visualize.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python3 +""" +Script to visualize output from the KPP standalone box model. +""" + +# Imports +from os.path import abspath, basename, join +from sys import argv +from glob import glob +from datetime import datetime, timedelta +from matplotlib.backends.backend_pdf import PdfPages +from matplotlib.figure import Figure +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from gcpy.constants import ENCODING +from gcpy.util import verify_variable_type + + +def kppsa_read_one_csv_file(file_name): + """ + Reads a single log file (in CSV format) from the KPP + standalone box model into a pandas.DataFrame object. + + Args + file_name : str : File to be read + + Returns + dframe : pd.DataFrame : DataFrame with the results + """ + verify_variable_type(file_name, str) + + # Initialize variables + latitude = "" + longitude = "" + level = "" + pressure = "" + timestamp = "" + + # Read file header contents + with open(file_name, "r", encoding=ENCODING) as ifile: + + # Find the number of rows to skip + skiprows = int(ifile.readline().strip()) + 1 + + # Read the rest of the header contents + for line in ifile: + line = line.strip() + + # Extract selected metadata fields + if "Location" in line: + location = line.split(":")[1].strip() + if "Timestamp" in line: + substrs = line.split(":") + timestamp = substrs[1].strip() + ":" + substrs[2].strip() + timestamp = timestamp.replace("/", "-").replace(" ", "T") + if "Longitude" in line: + longitude = line.split(":")[1].strip() + if "Latitude" in line: + latitude = line.split(":")[1].strip() + if "GEOS-Chem Vertical Level" in line: + level = line.split(":")[1].strip() + if "Pressure (hPa)" in line: + pressure = line.split(":")[1].strip() + + # Exit the loop + if "Species Name" in line: + break + + # Read the CSV into a DataFrame object + dframe = pd.read_csv( + file_name, + skiprows=skiprows, + delimiter=",", + dtype={ + "Initial Concentration (molec/cm3)": np.float64, + "Final Concentration (molec/cm3)": np.float64, + }, + engine="c" + ) + + # Add series with metadata obtained from the header + dframe = dframe.assign(Location=location) + dframe = dframe.assign(DateTime=np.datetime64(timestamp)) + dframe = dframe.assign(Longitude=float(longitude)) + dframe = dframe.assign(Latitude=float(latitude)) + dframe = dframe.assign(Level=int(level)) + dframe = dframe.assign(Pressure=float(pressure)) + + return dframe + + +def kppsa_read_csv_files(input_dir, site="all", search_str=""): + """ + Reads all KPP standalone log files for a given site + in a given directory. + + Args + input_dir : str : Directory to search + site : str : KPP standalone site name + + Returns + dframe_all : pd.DataFrame : Observations at all levels + + """ + verify_variable_type(input_dir, str) + verify_variable_type(site, str) + + # Get a list of files that match the site name + # Make sure to convert to absolute path + if site == "all": + file_list = glob(abspath(join(input_dir) + f"*{search_str}*.log")) + else: + file_list = glob(abspath(join(input_dir, site) + f"*{search_str}*.log")) + + # Read data from each level into a dataframe + # and concatenate into a new dataframe + dframe_all = pd.DataFrame() + for file_name in file_list: + dframe = kppsa_read_one_csv_file(file_name) + dframe_all = pd.concat([dframe_all, dframe], ignore_index=True) + del dframe + + return dframe_all + + +def get_element_of_series(series, element): + """ + Returns the first element of a pd.Series object. + + Args + serie : pd.Series : A pd.Series object + element : int : Element of the pd.Series object to return + + Returns + value : various : The returned element + """ + verify_variable_type(series, pd.Series) + verify_variable_type(element, int) + + return list(series)[element] + + +def prepare_site_data( + dframe, + site_name, + species, +): + """ + """ + + # Get data for a given species at all locations + site_data = dframe.loc[dframe["Location"] == site_name] + site_data = site_data.loc[site_data["Species Name"] == species] + site_data = site_data.loc[site_data["Pressure"] >= 500] + site_data = site_data.sort_values(by="Pressure", ascending=False) + + # Create the top title for the subplot for this observation site + # (use integer lon & lat values and N/S lat and E/W lon notation) + lat = int(round(get_element_of_series(dframe["Latitude"], 0))) + lon = int(round(get_element_of_series(dframe["Longitude"], 0))) + #time = get_element_of_series(dframe["DateTime"], 0) + ystr = "S" + if lat >= 0: + ystr = "N" + xstr = "W" + if lon >= 0: + xstr = "E" + lon = abs(lon) + lat = abs(lat) + site_title = \ + f"{site_name.strip()} ({lat}$^\\circ${ystr},{lon}$^\\circ${xstr})" + + return site_data, site_title + + +def plot_single_site( + fig, + rows_per_page, + cols_per_page, + subplot_index, + subplot_title, + dframe, + location, + species, +): + """ + Plots observation data vs. model data at a single station site. + + Args: + obs_dataframe : mpl.figure.Figure : Figure object for the plot + rows_per_page : int : # of rows to plot on a page + cols_per_page : int : # of columns to plot on a page + subplot_index : int : Index of each subplot + subplot_title : str : Title for each subplot + subplot_ylabel : str : Y-axis title for each subplot + obs_dataframe : pd.DataFrame : Observations at each station site + obs_site_name : str : Name of the station site + ref_series : pd.Series : Data from the Ref model version + ref_label : str : Label for the Ref model data + dev_dataarray : pd.Series : Data from the Dev model version + dev_label : str : Label for the Dev model data + """ + + # Error check + if species not in list(dframe["Species Name"]): + raise ValueError("Species is not included in the mechaism!") + + # Create matplotlib axes object for this subplot + # axes_subplot is of type matplotlib.axes_.subplots.AxesSubplot + axes_subplot = fig.add_subplot( + rows_per_page, + cols_per_page, + subplot_index + 1, + ) + + # Title for each subplot + axes_subplot.set_title( + subplot_title, + weight='bold', + fontsize=8 + ) + + # Initial concentration + axes_subplot.plot( + dframe["Initial Concentration (molec/cm3)"].astype(np.float64), + dframe["Pressure"], + color='k', + marker='^', + markersize=3, + lw=1, + label=f"Initial {species}", + ) + + # Final concentration + axes_subplot.plot( + dframe["Final Concentration (molec/cm3)"].astype(np.float64), + dframe["Pressure"], + color='r', + marker='o', + markersize=3, + lw=1, + label=f"Final {species}", + ) + + # Set X and Y axis labels + axes_subplot.set_xlabel( + f"{species} (molec/cm3)", + fontsize=8 + ) + + # Apply y-axis label only if this is a leftmost plot panel + if subplot_index == 0 or subplot_index % cols_per_page == 0: + axes_subplot.set_ylabel( + "Pressure (hPa)", + fontsize=8 + ) + + # Set Y-axis range + axes_subplot.set_ylim( + 1000.0, + 500.0 + ) + axes_subplot.set_yticks( + [1000, 900, 800, 700, 600, 500] + ) + axes_subplot.tick_params( + axis='both', + which='major', + labelsize=8 + ) + + +def plot_one_page( + pdf, + site_names, + dframe, + species="O3", + rows_per_page=3, + cols_per_page=3, + pdf_file=f"site.pdf", +): + """ + Plots a single page of models vs. observations. + + Args: + obs_dataframe : pd.DataFrame : Observations at each station site. + obs_label : str : Label for the observational data + obs_site_coords : dict : Coords (lon/lat/alt) at each site. + obs_site_names : list : Names of station sites per page + ref_dataarray : xr.DataArray : Data from the Ref model version + ref_label : str : Label for the Ref model data + ref_cs_grid : str|None : Metadata for Ref cubed-sphere grid + dev_dataarray : xr.DataArray : Data from the Dev model version + dev_label : str : Label for the Dev model data + dev_cs_grid : str|None : Metadata for Dev cubed-sphere grid + gc_levels : pd.DataFrame : Metadata for model vertical levels + rows_per_page : int : Number of rows to plot on a page + varname : str : Variable name for model data + """ + + # Define a new matplotlib.figure.Figure object for this page + # Landscape width: 11" x 8" + fig = plt.figure(figsize=(11, 8)) + fig.tight_layout() + + # Loop over all of the stations that fit on the page + for subplot_index, site_name in enumerate(site_names): + + # Get the data + site_data, site_title = prepare_site_data( + dframe, + site_name, + species, + ) + + # Plot species vertical profile at a given site + plot_single_site( + fig, + rows_per_page, + cols_per_page, + subplot_index, + site_title, + site_data, + site_name, + species, + ) + + # Add extra spacing around plots + plt.subplots_adjust( + hspace=0.4, + top=0.9 + ) + + # Add top-of-page legend + plt.legend( + ncol=3, + bbox_to_anchor=(0.5, 0.98), + bbox_transform=fig.transFigure, + loc='upper center' + ) + + # Save this page to the PDF file + pdf.savefig(fig) + + +def get_unique_site_names(dframe): + """ + """ + + # Sort the sites from north to south + site_names = dframe[["Location", "Latitude"]].sort_values( + by="Latitude", + ascending=False + ) + + # Get a list of unique site names, preserving the ordering + unique_site_names = [] + seen = set() + for site_name in site_names["Location"]: + if site_name not in seen: + seen.add(site_name) + unique_site_names.append(site_name) + + return unique_site_names + + +def make_kpp_standalone_plots(search_str, species): + """ + """ + + # Read data + dframe = kppsa_read_csv_files("./output4/", search_str=search_str) + + # Figure setup + plt.style.use("seaborn-v0_8-darkgrid") + rows_per_page = 3 + cols_per_page = 2 + plots_per_page = rows_per_page * cols_per_page + + # Open the plot as a PDF document + pdf = PdfPages(f"{species}.pdf") + + # Get a list of site names sorted from N to S + site_names = get_unique_site_names(dframe) + + # Loop over the number of obs sites that fit on a page + for start in range(0, len(site_names), plots_per_page): + end = start + plots_per_page - 1 + plot_one_page( + pdf, + site_names[start:end+1], + dframe, + species, + rows_per_page=rows_per_page, + cols_per_page=cols_per_page + ) + + pdf.close() + + # Reset the plot style (this prevents the seaborn style from + # being applied to other model vs. obs plotting scripts) + plt.style.use("default") + + +if __name__ == '__main__': + make_kpp_standalone_plots(argv[1], argv[2]) + diff --git a/gcpy/kpp/__init__.py b/gcpy/kpp/__init__.py new file mode 100644 index 0000000..f2d0fa0 --- /dev/null +++ b/gcpy/kpp/__init__.py @@ -0,0 +1,6 @@ +""" +GCPy import script +""" +from .kppsa_utils import * +from .kppsa_quick_look import * +from .kppsa_plot_sites import * diff --git a/gcpy/kpp/kppsa_plot_sites.py b/gcpy/kpp/kppsa_plot_sites.py new file mode 100755 index 0000000..159d16b --- /dev/null +++ b/gcpy/kpp/kppsa_plot_sites.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +""" +Script to visualize output from the KPP standalone box model. +""" + +# Imports +import argparse +from matplotlib.backends.backend_pdf import PdfPages +import matplotlib.pyplot as plt +from gcpy.util import verify_variable_type +from gcpy.kpp.kppsa_utils import \ + kppsa_get_file_list, kppsa_get_unique_site_names,\ + kppsa_plot_one_page, kppsa_read_csv_files + + +def kppsa_plot_species_at_sites( + ref_file_list, + ref_label, + dev_file_list, + dev_label, + species, + pdfname, +): + """ + TBD + """ + verify_variable_type(ref_file_list, list) + verify_variable_type(ref_label, str) + verify_variable_type(dev_file_list, list) + verify_variable_type(dev_label, str) + verify_variable_type(species, str) + verify_variable_type(pdfname, str) + + # Read data + ref_data = kppsa_read_csv_files(ref_file_list) + dev_data = kppsa_read_csv_files(dev_file_list) + + # Get a list of site names sorted from N to S + site_names = kppsa_get_unique_site_names(ref_data) + + # Figure setup + plt.style.use("seaborn-v0_8-darkgrid") + rows_per_page = 3 + cols_per_page = 2 + plots_per_page = rows_per_page * cols_per_page + + # Open the plot as a PDF document + if ".pdf" not in pdfname: + pdfname += ".pdf" + pdf = PdfPages(f"{pdfname}") + + # Loop over the number of obs sites that fit on a page + for start in range(0, len(site_names), plots_per_page): + end = start + plots_per_page - 1 + kppsa_plot_one_page( + pdf, + site_names[start:end+1], + ref_data, + ref_label, + dev_data, + dev_label, + species, + rows_per_page, + cols_per_page, + font_scale=1.0, + ) + + # Close the PDF file + pdf.close() + + # Reset the plot style (this prevents the seaborn style from + # being applied to other model vs. obs plotting scripts) + plt.style.use("default") + + +def main(): + """ + TBD + """ + # Tell the parser which arguments to look for + parser = argparse.ArgumentParser( + description="Single-panel plotting example program" + ) + parser.add_argument( + "--refdir", + metavar="REFDIR", + type=str, + required=True, + help="Directory w/ KPP-Standalone log files (Ref version)" + ) + parser.add_argument( + "--reflabel", + metavar="REFLABEL", + type=str, + required=False, + help="Descriptive label for the Ref data", + default="Ref" + ) + parser.add_argument( + "--devdir", + metavar="DEVDIR", + type=str, + required=True, + help="Directory w/ KPP-Standalone log files (Dev version)" + ) + parser.add_argument( + "--devlabel", + metavar="DEVLABEL", + type=str, + required=False, + help="Descriptive label for the Ref data", + default="Dev" + ) + parser.add_argument( + "--pattern", + metavar="PATTERN", + type=str, + required=False, + help="Search for file names matching this pattern", + ) + parser.add_argument( + "--species", + metavar="SPECIES", + type=str, + required=True, + help="Species to plot" + ) + parser.add_argument( + "--pdfname", + metavar="PDF-FILE-NAME", + type=str, + required=False, + help="Name of the PDF file to be created", + default="kppsa_output.pdf" + ) + + # Parse command-line arguments + args = parser.parse_args() + + # Get a list of KPP-Standalone files matching the criteria (Ref) + ref_file_list = kppsa_get_file_list( + args.refdir, + args.pattern, + ) + if len(ref_file_list) == 0: + msg = "Could not find any files matching {pattern} for Ref!" + raise ValueError(msg) + + dev_file_list = kppsa_get_file_list( + args.devdir, + args.pattern, + ) + if len(ref_file_list) == 0: + msg = "Could not find any files matching {pattern} for Dev!" + raise ValueError(msg) + + # Plot data + kppsa_plot_species_at_sites( + ref_file_list, + args.reflabel, + dev_file_list, + args.devlabel, + args.species, + args.pdfname, + ) + +if __name__ == '__main__': + main() diff --git a/gcpy/kpp/kppsa_quick_look.py b/gcpy/kpp/kppsa_quick_look.py new file mode 100755 index 0000000..f3ca263 --- /dev/null +++ b/gcpy/kpp/kppsa_quick_look.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +Creates a "quick-look" plot from KPP-Standalone box model output. +""" + +# Imports +import argparse +import matplotlib.pyplot as plt +from gcpy.util import verify_variable_type +from gcpy.kpp.kppsa_utils import \ + kppsa_get_file_list, kppsa_get_unique_site_names, \ + kppsa_plot_single_site, kppsa_prepare_site_data, \ + kppsa_read_csv_files + + +def kppsa_make_quick_look_plot(file_list, label, species): + """ + Creates a quick-look plot from KPP-Standalone box model output. + + Args + file_list : list : List of KPP-Standalone log files + site_name : str : Name of the site that you wish to plot + label : str : Descriptive label for the data + species : str : Name of the species that you wish to plot + """ + verify_variable_type(file_list, list) + verify_variable_type(label, str) + verify_variable_type(species, str) + + # Read data + dframe = kppsa_read_csv_files(file_list) + + # Get the site name from the DataFrame + site_name = kppsa_get_unique_site_names(dframe)[0] + + # Get the data for the given species and site + site_data, site_title = kppsa_prepare_site_data( + dframe, + site_name, + species, + ) + + # Figure setup + plt.style.use("seaborn-v0_8-darkgrid") + + # Define a new matplotlib.figure.Figure object for this page + # Landscape width: 11" x 8" + fig = plt.figure(figsize=(11, 8)) + fig.tight_layout() + + # Figure setup + plt.style.use("seaborn-v0_8-darkgrid") + + # Plot species vertical profile at a given site + kppsa_plot_single_site( + fig, + rows_per_page=1, + cols_per_page=1, + subplot_index=0, + subplot_title=site_title, + ref_data=site_data, + ref_label=label, + dev_data=None, + dev_label=None, + species=species, + font_scale=2.0, + ) + + # Add top-of-page legend + plt.legend( + ncol=3, + bbox_to_anchor=(0.5, 0.98), + bbox_transform=fig.transFigure, + loc='upper center' + ) + + # Show the plot + plt.show() + + # Reset the plot style (this prevents the seaborn style from + # being applied to other model vs. obs plotting scripts) + plt.style.use("default") + + +def main(): + """ + Parses arguments and calls function kppsa_make_quick_look_plot + to generate a "quick-look" plot from KPP-Standalone box model + output. + """ + + # Tell the parser which arguments to look for + parser = argparse.ArgumentParser( + description="Single-panel plotting example program" + ) + parser.add_argument( + "-d", "--dirname", + metavar="DIRNAME", + type=str, + required=True, + help="Directory containing KPP-Standalone output files" + ) + parser.add_argument( + "-l", "--label", + metavar="LABEL", + type=str, + required=False, + help="Descriptive label", + default="KPP-Standalone output" + ) + parser.add_argument( + "-p", "--pattern", + metavar="PATTERN", + type=str, + required=False, + help="Search for file names matching this pattern", + ) + parser.add_argument( + "-s", "--species", + metavar="SPECIES", + type=str, + required=True, + help="Species to plot" + ) + + # Parse command-line arguments + args = parser.parse_args() + + # Get a list of KPP-Standalone files matching the criteria + file_list = kppsa_get_file_list( + args.dirname, + args.pattern, + ) + + # Create the quick look plot from KPP-Standalone output + kppsa_make_quick_look_plot( + file_list, + args.label, + args.species + ) + + +if __name__ == '__main__': + main() diff --git a/gcpy/kpp/kppsa_utils.py b/gcpy/kpp/kppsa_utils.py new file mode 100755 index 0000000..b82cb74 --- /dev/null +++ b/gcpy/kpp/kppsa_utils.py @@ -0,0 +1,407 @@ +#!/usr/bin/env python3 +""" +Utility functions for visualizing output from the +KPP-Standalone box model. +""" + +# Imports +from os.path import expanduser, join +from glob import glob +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from gcpy.constants import ENCODING +from gcpy.util import get_element_of_series, verify_variable_type + + +def kppsa_get_file_list( + input_dir, + pattern="" +): + """ + Returns a list of KPP-Standalone log files matching + a search criteria. + + Args + input_dir : str : Directory with KPP-Standalone log files + pattern : str : Read files matching this pattern (Default = "") + + Returns + file_list : list : List of files matching the criteria + """ + return glob(join(expanduser(input_dir), f"*{pattern}*")) + + +def kppsa_read_one_csv_file(file_name): + """ + Reads a single log file (in CSV format) from the KPP + standalone box model into a pandas.DataFrame object. + + Args + file_name : str : File to be read + + Returns + dframe : pd.DataFrame : DataFrame with the results + """ + verify_variable_type(file_name, str) + + # Initialize variables + latitude = "" + level = "" + longitude = "" + location = "" + pressure = "" + timestamp = "" + + # Read file header contents + with open(file_name, "r", encoding=ENCODING) as ifile: + + # Find the number of rows to skip + skiprows = int(ifile.readline().strip()) + 1 + + # Read the rest of the header contents + for line in ifile: + line = line.strip() + + # Extract selected metadata fields + if "Location" in line: + location = line.split(":")[1].strip() + if "Timestamp" in line: + substrs = line.split(":") + timestamp = substrs[1].strip() + ":" + substrs[2].strip() + timestamp = timestamp.replace("/", "-").replace(" ", "T") + if "Longitude" in line: + longitude = line.split(":")[1].strip() + if "Latitude" in line: + latitude = line.split(":")[1].strip() + if "GEOS-Chem Vertical Level" in line: + level = line.split(":")[1].strip() + if "Pressure (hPa)" in line: + pressure = line.split(":")[1].strip() + + # Exit the loop + if "Species Name" in line: + break + + # Read the CSV into a DataFrame object + dframe = pd.read_csv( + file_name, + skiprows=skiprows, + delimiter=",", + dtype={ + "Initial Concentration (molec/cm3)": np.float64, + "Final Concentration (molec/cm3)": np.float64, + }, + engine="c" + ) + + # Add series with metadata obtained from the header + dframe = dframe.assign(Location=location) + dframe = dframe.assign(DateTime=np.datetime64(timestamp)) + dframe = dframe.assign(Longitude=float(longitude)) + dframe = dframe.assign(Latitude=float(latitude)) + dframe = dframe.assign(Level=int(level)) + dframe = dframe.assign(Pressure=float(pressure)) + + return dframe + + +def kppsa_read_csv_files(file_list): + """ + Reads all KPP standalone log files for a given site + in a given directory. + + Args + input_dir : str : Directory to search + site : str : KPP standalone site name + + Returns + dframe_all : pd.DataFrame : Observations at all levels + + """ + dframe_all = pd.DataFrame() + for file_name in file_list: + dframe = kppsa_read_one_csv_file(file_name) + dframe_all = pd.concat([dframe_all, dframe], ignore_index=True) + del dframe + + return dframe_all + + +def kppsa_prepare_site_data( + dframe, + site_name, + species, +): + """ + Returns a pd.DataFrame object containing data for a given species, + and observation site, as well as the corresponding top-of-plot + title. Species data is limited from the surface to 500 hPa. + + Args + dframe : pd.DataFrame : KPP-Standalone output data + site_name : str : Name of site to plot + species : species : Name of species to plot + + Returns + site_data : pd.DataFrame : Data for the given site & species + site_title : str : Corresponding plot title string + """ + + # Exit if the data frame is set to None + if dframe is None: + return None, None + + # Get data for a given species at all locations + site_data = dframe.loc[dframe["Location"] == site_name] + site_data = site_data.loc[site_data["Species Name"] == species] + site_data = site_data.loc[site_data["Pressure"] >= 500] + site_data = site_data.sort_values(by="Pressure", ascending=False) + + # Create the top title for the subplot for this observation site + # (use integer lon & lat values and N/S lat and E/W lon notation) + lat = int(round(get_element_of_series(site_data["Latitude"], 0))) + lon = int(round(get_element_of_series(site_data["Longitude"], 0))) + time = get_element_of_series(site_data["DateTime"], 0) + ystr = "S" + if lat >= 0: + ystr = "N" + xstr = "W" + if lon >= 0: + xstr = "E" + lon = abs(lon) + lat = abs(lat) + site_title = \ + f"{site_name.strip()} ({lat}$^\\circ${ystr},{lon}$^\\circ${xstr})" + site_title += f" at {time}" + + return site_data, site_title + + +def kppsa_plot_single_site( + fig, + rows_per_page, + cols_per_page, + subplot_index, + subplot_title, + ref_data, + ref_label, + dev_data, + dev_label, + species, + font_scale, +): + """ + Plots observation data vs. model data at a single station site. + + Args: + fig : mpl.figure.Figure : Figure object for the plot + rows_per_page : int : # of rows to plot on a page + cols_per_page : int : # of columns to plot on a page + subplot_index : int : Index of each subplot + subplot_title : str : Title for each subplot + ref_data : pd.DataFrame : Observations at each station site + ref_label : str : Label for the Ref model data + dev_data : pd.DataFrame : + dev_label : str : Label for the Dev model data + site_name : str : Name of the station site + species : pd.Series : Data from the Ref model version + font_scale : float : Scale fac to increase font size + """ + + # Error checks + if species not in list(ref_data["Species Name"]): + raise ValueError("Species not found in Ref model output!") + if dev_data is not None: + if species not in list(dev_data["Species Name"]): + raise ValueError("Species not found in Dev model output!") + + # Create matplotlib axes object for this subplot + # axes_subplot is of type matplotlib.axes_.subplots.AxesSubplot + axes_subplot = fig.add_subplot( + rows_per_page, + cols_per_page, + subplot_index + 1, + ) + + # Title for each subplot + axes_subplot.set_title( + subplot_title, + weight='bold', + fontsize=8*font_scale, + ) + + # Initial concentration + axes_subplot.plot( + ref_data["Initial Concentration (molec/cm3)"].astype(np.float64), + ref_data["Pressure"], + color='k', + marker='o', + markersize=3*font_scale, + lw=1, + label=f"Initial {species}", + ) + + # Final concentration (Ref) + axes_subplot.plot( + ref_data["Final Concentration (molec/cm3)"].astype(np.float64), + ref_data["Pressure"], + color='r', + marker='o', + markersize=3*font_scale, + lw=1, + label=f"{ref_label}", + ) + + # Final concentration (Dev) + if dev_data is not None: + axes_subplot.plot( + dev_data["Final Concentration (molec/cm3)"].astype(np.float64), + dev_data["Pressure"], + color='b', + marker='o', + markersize=3*font_scale, + lw=1, + label=f"{dev_label}", + ) + + + # Set X and Y axis labels + axes_subplot.set_xlabel( + f"{species} (molec/cm3)", + fontsize=8*font_scale, + ) + + # Apply y-axis label only if this is a leftmost plot panel + if subplot_index == 0 or subplot_index % cols_per_page == 0: + axes_subplot.set_ylabel( + "Pressure (hPa)", + fontsize=8*font_scale, + ) + + # Set Y-axis range + axes_subplot.set_ylim( + 1020.0, + 500.0 + ) + axes_subplot.set_yticks( + [1000, 900, 800, 700, 600, 500] + ) + axes_subplot.tick_params( + axis='both', + which='major', + labelsize=8*font_scale + ) + + +def kppsa_plot_one_page( + pdf, + site_names, + ref_dframe, + ref_label, + dev_dframe, + dev_label, + species="O3", + rows_per_page=3, + cols_per_page=3, + font_scale=1.0, +): + """ + Plots a single page of models vs. observations. + + Args: + pdf : pdf : PDF object + ref_dframe : pd.DataFrame : Observations at each station site. + ref_label : str : Label for the observational data + dev_dframe : pd.DataFrame : Data from the Ref model version + dev_label : str : Label for the Ref model data + species : str : Name of the species to plot + dev_dataarray : xr.DataArray : Data from the Dev model version + dev_label : str : Label for the Dev model data + dev_cs_grid : str|None : Metadata for Dev cubed-sphere grid + gc_levels : pd.DataFrame : Metadata for model vertical levels + rows_per_page : int : Number of rows to plot on a page + cols_per_page : int : Number of cols to plot on a page + font_scale : float : PDF output file name + """ + + # Define a new matplotlib.figure.Figure object for this page + # Landscape width: 11" x 8" + fig = plt.figure(figsize=(11, 8)) + fig.tight_layout() + + # Loop over all of the stations that fit on the page + for subplot_index, site_name in enumerate(site_names): + + # Get the data for the given species and site + ref_site_data, site_title = kppsa_prepare_site_data( + ref_dframe, + site_name, + species, + ) + dev_site_data, _ = kppsa_prepare_site_data( + dev_dframe, + site_name, + species, + ) + + # Plot species vertical profile at a given site + kppsa_plot_single_site( + fig, + rows_per_page, + cols_per_page, + subplot_index, + site_title, + ref_site_data, + ref_label, + dev_site_data, + dev_label, + species, + font_scale, + ) + + # Add extra spacing around plots + plt.subplots_adjust( + hspace=0.4, + top=0.9 + ) + + # Add top-of-page legend + plt.legend( + ncol=3, + bbox_to_anchor=(0.5, 0.98), + bbox_transform=fig.transFigure, + loc='upper center' + ) + + # Save this page to the PDF file + pdf.savefig(fig) + + +def kppsa_get_unique_site_names(dframe): + """ + Returns a list of unique sites where KPP-Standalone box model + output has been archived. + + Args + dframe : pd.DataFrame : Object containing KPP-Standalone output + + Returns + site_names : list of str : List of unique site names + """ + + # Sort the sites from north to south + site_names = dframe[["Location", "Latitude"]].sort_values( + by="Latitude", + ascending=False + ) + + # Get a list of unique site names, preserving the ordering + unique_site_names = [] + seen = set() + for site_name in site_names["Location"]: + if site_name not in seen: + seen.add(site_name) + unique_site_names.append(site_name) + + return unique_site_names diff --git a/gcpy/util.py b/gcpy/util.py index 9344e47..0cf14c0 100644 --- a/gcpy/util.py +++ b/gcpy/util.py @@ -8,6 +8,7 @@ from textwrap import wrap from yaml import safe_load import numpy as np +from pandas import Series import xarray as xr from pypdf import PdfWriter, PdfReader from gcpy.constants import ENCODING, TABLE_WIDTH @@ -2258,3 +2259,20 @@ def replace_whitespace( verify_variable_type(repl_char, str) return repl_char.join(string.split()) + + +def get_element_of_series(series, element): + """ + Returns a specified element of a pd.Series object. + + Args + serie : pd.Series : A pd.Series object + element : int : Element of the pd.Series object to return + + Returns + value : various : The returned element + """ + verify_variable_type(series, Series) + verify_variable_type(element, int) + + return list(series)[element]