From f0b6a0d48b596ddd077bd6ce535cb7570e64716a Mon Sep 17 00:00:00 2001
From: ktehranchi <ktehranchi@stanford.edu>
Date: Tue, 31 Dec 2024 20:46:38 -0500
Subject: [PATCH 1/4] Fix ll opt reference point for transport models

---
 workflow/rules/build_electricity.smk |  1 +
 workflow/rules/postprocess.smk       |  2 ++
 workflow/scripts/plot_statistics.py  |  5 +--
 workflow/scripts/prepare_network.py  | 47 ++++++++++++++++++----------
 4 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/workflow/rules/build_electricity.smk b/workflow/rules/build_electricity.smk
index f716b3dd..2fa985da 100644
--- a/workflow/rules/build_electricity.smk
+++ b/workflow/rules/build_electricity.smk
@@ -761,6 +761,7 @@ rule prepare_network:
         co2limit_enable=config_provider("electricity", "co2limit_enable", default=False),
         gaslimit=config_provider("electricity", "gaslimit"),
         gaslimit_enable=config_provider("electricity", "gaslimit_enable", default=False),
+        transmission_network=config_provider("model_topology", "transmission_network"),
         max_hours=config_provider("electricity", "max_hours"),
         costs=config_provider("costs"),
         autarky=config_provider("electricity", "autarky"),
diff --git a/workflow/rules/postprocess.smk b/workflow/rules/postprocess.smk
index 30d81d41..6d9df24c 100644
--- a/workflow/rules/postprocess.smk
+++ b/workflow/rules/postprocess.smk
@@ -88,6 +88,8 @@ rule plot_statistics:
         + "{interconnect}/figures/s{simpl}_cluster_{clusters}/l{ll}_{opts}_{sector}/statistics/generators.csv",
         storage_units=RESULTS
         + "{interconnect}/figures/s{simpl}_cluster_{clusters}/l{ll}_{opts}_{sector}/statistics/storage_units.csv",
+        links=RESULTS
+        + "{interconnect}/figures/s{simpl}_cluster_{clusters}/l{ll}_{opts}_{sector}/statistics/links.csv",
     log:
         "logs/plot_figures/{interconnect}_{simpl}_{clusters}_l{ll}_{opts}_{sector}.log",
     threads: 1
diff --git a/workflow/scripts/plot_statistics.py b/workflow/scripts/plot_statistics.py
index d70c3ee6..d39e5209 100644
--- a/workflow/scripts/plot_statistics.py
+++ b/workflow/scripts/plot_statistics.py
@@ -391,7 +391,7 @@ def plot_regional_capacity_additions_bar(n, save):
     Plot capacity evolution by NERC region in a stacked bar plot.
     """
     data = get_statistics(n, "Optimal Capacity")
-    data.to_csv(f"{Path(save).parent}/bar_regional_capacity.csv")
+    data.to_csv(f"{Path(save).parent.parent}/statistics/bar_regional_capacity.csv")
     plot_bar(data, n, save, "", "Capacity (GW)", is_capacity=True)
 
 
@@ -400,7 +400,7 @@ def plot_regional_production_bar(n, save):
     Plot production evolution by NERC region in a stacked bar plot.
     """
     data = get_statistics(n, "Supply")
-    data.to_csv(f"{Path(save).parent}/bar_regional_production.csv")
+    data.to_csv(f"{Path(save).parent.parent}/statistics/bar_regional_production.csv")
     plot_bar(data, n, save, "", "Production (GWh)")
 
 
@@ -932,6 +932,7 @@ def plot_fuel_costs(
     n.statistics().round(2).to_csv(snakemake.output.statistics_summary)
     n.generators.to_csv(snakemake.output.generators)
     n.storage_units.to_csv(snakemake.output.storage_units)
+    n.links.to_csv(snakemake.output.links)
 
     # Panel Plots
     plot_generator_data_panel(
diff --git a/workflow/scripts/prepare_network.py b/workflow/scripts/prepare_network.py
index 8eb36a79..ee964dfc 100644
--- a/workflow/scripts/prepare_network.py
+++ b/workflow/scripts/prepare_network.py
@@ -63,6 +63,7 @@
 import pypsa
 from _helpers import (
     configure_logging,
+    is_transport_model,
     set_scenario_config,
     update_config_from_wildcards,
 )
@@ -133,9 +134,10 @@ def add_emission_prices(n, emission_prices={"co2": 0.0}, exclude_co2=False):
     n.storage_units["marginal_cost"] += su_ep
 
 
-def set_line_s_max_pu(n, s_max_pu=0.7):
-    n.lines["s_max_pu"] = s_max_pu
-    logger.info(f"N-1 security margin of lines set to {s_max_pu}")
+def set_line_s_max_pu(n, transport_model, s_max_pu=0.7):
+    if not transport_model:
+        logger.info(f"N-1 security margin of lines set to {s_max_pu}")
+        n.lines["s_max_pu"] = s_max_pu
 
 
 def set_transmission_limit(n, ll_type, factor):
@@ -163,9 +165,11 @@ def set_transmission_limit(n, ll_type, factor):
         + n.links.loc[dc_links, "p_nom"] @ n.links.loc[dc_links, col]
         + n.links.loc[ac_links_existing, "p_nom"] @ n.links.loc[ac_links_existing, col]
     )
+    ref_dc = n.links.loc[dc_links, "p_nom"] @ n.links.loc[dc_links, col]
 
     if factor == "opt" or float(factor) > 1.0:
         # if opt allows expansion set respective lines/links to extendable
+        # all links prior to this point have extendable set to false
         n.lines["s_nom_min"] = lines_s_nom
         n.lines["s_nom_extendable"] = True
 
@@ -176,7 +180,14 @@ def set_transmission_limit(n, ll_type, factor):
         n.links.loc[ac_links_exp, "p_nom_extendable"] = True
     if factor != "opt":
         con_type = "expansion_cost" if ll_type == "c" else "volume_expansion"
-        rhs = float(factor) * ref
+        if transport_model:
+            # Transport models have links split to existing and non-existing
+            # The global constraint applies to the p_nom_opt of extendable capacity
+            # thus we must only include the 'new' transmission capacity as reference
+            rhs = ((float(factor) - 1.0) * ref) + ref_dc
+        else:
+            rhs = float(factor) * ref
+
         n.add(
             "GlobalConstraint",
             f"l{ll_type}_limit",
@@ -343,6 +354,8 @@ def set_line_nom_max(
     configure_logging(snakemake)
     set_scenario_config(snakemake)
     update_config_from_wildcards(snakemake.config, snakemake.wildcards)
+    params = snakemake.params
+    transport_model = is_transport_model(params.transmission_network)
 
     n = pypsa.Network(snakemake.input[0])
     Nyears = n.snapshot_weightings.loc[n.investment_periods[0]].objective.sum() / 8760.0
@@ -353,17 +366,17 @@ def set_line_nom_max(
     inv_per_time_weight = n.investment_periods.to_series().diff().shift(-1).ffill().fillna(1)
     n.investment_period_weightings["years"] = inv_per_time_weight
     # set Investment Period Objective weightings
-    social_discountrate = snakemake.params.costs["social_discount_rate"]
+    social_discountrate = params.costs["social_discount_rate"]
     objective_w = get_investment_weighting(
         n.investment_period_weightings["years"],
         social_discountrate,
     )
     n.investment_period_weightings["objective"] = objective_w
 
-    set_line_s_max_pu(n, snakemake.params.lines["s_max_pu"])
+    set_line_s_max_pu(n, transport_model, params.lines["s_max_pu"])
 
     # temporal averaging
-    time_resolution = snakemake.params.time_resolution
+    time_resolution = params.time_resolution
     is_string = isinstance(time_resolution, str)
     if is_string and time_resolution.lower().endswith("h"):
         n = average_every_nhours(n, time_resolution)
@@ -375,17 +388,17 @@ def set_line_nom_max(
         segments = int(time_resolution.lower().replace("seg", ""))
         n = apply_time_segmentation(n, segments, solver_name)
 
-    if snakemake.params.co2limit_enable:
-        add_co2limit(n, snakemake.params.co2limit, Nyears)
+    if params.co2limit_enable:
+        add_co2limit(n, params.co2limit, Nyears)
 
-    if snakemake.params.gaslimit_enable:
-        add_gaslimit(n, snakemake.params.gaslimit, Nyears)
+    if params.gaslimit_enable:
+        add_gaslimit(n, params.gaslimit, Nyears)
 
-    emission_prices = snakemake.params.costs["emission_prices"]
+    emission_prices = params.costs["emission_prices"]
     if emission_prices["enable"]:
         add_emission_prices(
             n,
-            dict(co2=snakemake.params.costs["emission_prices"]["co2"]),
+            dict(co2=params.costs["emission_prices"]["co2"]),
         )
 
     ll_type, factor = snakemake.wildcards.ll[0], snakemake.wildcards.ll[1:]
@@ -393,10 +406,10 @@ def set_line_nom_max(
 
     set_line_nom_max(
         n,
-        s_nom_max_set=snakemake.params.lines.get("s_nom_max", np.inf),
-        p_nom_max_set=snakemake.params.links.get("p_nom_max", np.inf),
-        s_nom_max_ext=snakemake.params.lines.get("max_extension", np.inf),
-        p_nom_max_ext=snakemake.params.links.get("max_extension", np.inf),
+        s_nom_max_set=params.lines.get("s_nom_max", np.inf),
+        p_nom_max_set=params.links.get("p_nom_max", np.inf),
+        s_nom_max_ext=params.lines.get("max_extension", np.inf),
+        p_nom_max_ext=params.links.get("max_extension", np.inf),
     )
 
     n.meta = dict(snakemake.config, **dict(wildcards=dict(snakemake.wildcards)))

From fa657501439aa1011a8336586ce31f7a105910b7 Mon Sep 17 00:00:00 2001
From: ktehranchi <ktehranchi@stanford.edu>
Date: Wed, 1 Jan 2025 15:41:09 -0800
Subject: [PATCH 2/4] Add scenario comparison script

---
 workflow/scripts/scenario_comparison.py | 255 ++++++++++++++++++++++++
 1 file changed, 255 insertions(+)
 create mode 100644 workflow/scripts/scenario_comparison.py

diff --git a/workflow/scripts/scenario_comparison.py b/workflow/scripts/scenario_comparison.py
new file mode 100644
index 00000000..4783c0a5
--- /dev/null
+++ b/workflow/scripts/scenario_comparison.py
@@ -0,0 +1,255 @@
+"""
+This script facilitates comparison of scenarios run in PyPSA-USA. It loads data from multiple scenarios, processes it for analysis, and generates comparative plots for system metrics such as optimal capacities, supply, and costs. The script is designed for users working with PyPSA networks and scenario data stored in a specific YAML configuration format.
+
+Usage:
+------
+
+1. **Setup Configuration File**:
+    - Create a YAML configuration file containing details about the scenarios, aliases, order of scenarios, and network data path. An example structure for the configuration file:
+      ```yaml
+      scenarios:
+        - name: "Scenario 1"
+          path: "path/to/scenario1/"
+        - name: "Scenario 2"
+          path: "path/to/scenario2/"
+      alias_dict:
+        "Scenario 1": "S1"
+        "Scenario 2": "S2"
+      new_order:
+        - "S1"
+        - "S2"
+      reference_scenario: "S1"
+      output_folder_name: "folder_name"
+      network:
+        path: "path/to/network/file.nc"
+      ```
+
+2. **Prepare Directory Structure**:
+    - Place the YAML configuration file in the parent directory of the current working directory under `config/scenario_comparison.yaml`.
+    - Ensure the scenario data contains statistics files (`statistics/statistics*.csv`).
+
+3. **Execution**:
+    - Run the script from its directory using a Python environment with the required libraries (`yaml`, `pandas`, `pypsa`, `matplotlib`, `numpy`, `seaborn`).
+
+4. **Outputs**:
+    - Plots will be saved in the `results/{output_folder_name}` directory in the parent of the current working directory. These include:
+      - Bar charts comparing system metrics like "Optimal Capacity" or "System Costs."
+      - Comparative percentage charts, if a reference scenario is specified.
+
+5. **Adjusting Variables**:
+    - Modify variables such as `variable`, `variable_units`, and `title` in the script to customize the metrics and plot titles.
+
+Functions:
+----------
+- `get_carriers(n)`: Processes the carrier data from the PyPSA network for plotting.
+- `load_yaml_config(yaml_path)`: Loads the YAML configuration file.
+- `load_scenario_data(scenarios)`: Reads scenario CSV data from paths specified in the configuration.
+- `process_data(data, alias_dict=None, new_order=None)`: Processes raw scenario data, applying aliases and ordering.
+- `scenario_comparison(...)`: Generates horizontal bar charts comparing scenario data for a specific variable.
+- `plot_cost_comparison(...)`: Plots a cost comparison across scenarios and optionally a percentage comparison relative to a reference scenario.
+
+Dependencies:
+-------------
+- Libraries: `yaml`, `pandas`, `pypsa`, `matplotlib`, `numpy`, `seaborn`.
+- Ensure all dependencies are installed in your Python environment before running the script.
+
+Example:
+--------
+Run the script to generate comparison plots for energy system scenarios defined in the configuration file:
+```bash
+python script_name.py
+"""
+
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pypsa
+import seaborn as sns
+import yaml
+from matplotlib import pyplot as plt
+
+
+def get_carriers(n):
+    carriers = n.carriers
+    carriers["legend_name"] = carriers.nice_name
+    carriers.loc["DC", "legend_name"] = "Transmission"
+    carriers.loc["DC", "color"] = "#cf1dab"
+    carriers.loc["battery", "legend_name"] = "Existing BESS"
+    carriers.set_index("nice_name", inplace=True)
+    carriers.sort_values(by="co2_emissions", ascending=False, inplace=True)
+    return carriers
+
+
+# Load scenarios from YAML configuration
+def load_yaml_config(yaml_path):
+    with open(yaml_path) as file:
+        return yaml.safe_load(file)
+
+
+# Load CSV data for all scenarios
+def load_scenario_data(scenarios):
+    data = {}
+    for scenario in scenarios:
+        scenario_name = scenario["name"]
+        path = Path(scenario["path"])
+        data[scenario_name] = {
+            file.stem: pd.read_csv(file, index_col=[0, 1], header=[0, 1])
+            for file in path.glob("statistics/statistics*.csv")
+        }
+    return data
+
+
+# Process data to match the expected format
+def process_data(data, alias_dict=None, new_order=None):
+    stats = {}
+    for scenario_name, files in data.items():
+        stats[scenario_name] = files  # Placeholder for specific transformations
+
+    if alias_dict:
+        stats_with_alias = {}
+        for scenario_name, df in stats.items():
+            alias_name = alias_dict.get(scenario_name, scenario_name)
+            stats_with_alias[alias_name] = df
+        stats = stats_with_alias
+
+    if new_order:
+        stats = {key: stats[key] for key in new_order if key in stats}
+
+    return stats
+
+
+# Plot comparison
+def scenario_comparison(stats, variable, variable_units, carriers, title, figures_path, include_link=False):
+    colors = carriers["color"]
+    planning_horizons = stats[next(iter(stats.keys()))]["statistics"][variable].columns
+    fig, axes = plt.subplots(
+        nrows=len(planning_horizons),
+        ncols=1,
+        figsize=(8, 4.2 * len(planning_horizons)),
+        sharex=True,
+    )
+
+    if len(planning_horizons) == 1:
+        axes = [axes]
+
+    for ax, horizon in zip(axes, planning_horizons):
+        y_positions = np.arange(len(stats))
+        for j, (scenario, df) in enumerate(stats.items()):
+            df = df["statistics"].fillna(0)
+            bottoms = np.zeros(len(df.columns))
+            if include_link:
+                df = df.loc[df.index.get_level_values(0).isin(["Generator", "StorageUnit", "Link"]), variable]
+                df = df.loc[~(df.index.get_level_values(1) == "Ac")]
+            else:
+                df = df.loc[df.index.get_level_values(0).isin(["Generator", "StorageUnit"]), variable]
+            df.index = df.index.get_level_values(1)
+            df = df.reindex(carriers.index).dropna()
+            for i, tech in enumerate(df.index.unique()):
+                values = df.loc[tech, horizon] / (1e3 if variable_units in ["GW", "GWh"] else 1e9)
+                ax.barh(y_positions[j], values, left=bottoms[j], color=colors[tech], label=tech if j == 0 else "")
+                bottoms[j] += values
+
+        ax.text(1.01, 0.5, f"{horizon}", transform=ax.transAxes, va="center", rotation="vertical")
+        ax.set_yticks(y_positions)
+        ax.set_yticklabels(stats.keys())
+        ax.grid(True, axis="x", linestyle="--", alpha=0.5)
+
+    plt.xlabel(f"{variable} [{variable_units}]")
+    plt.subplots_adjust(hspace=0)
+    carriers_plotted = carriers.loc[carriers.index.intersection(df.index.unique())]
+    legend_handles = [plt.Rectangle((0, 0), 1, 1, color=colors[tech]) for tech in carriers_plotted.index]
+    fig.legend(
+        handles=legend_handles,
+        labels=carriers_plotted.legend_name.tolist(),
+        loc="lower center",
+        bbox_to_anchor=(0.5, -0.4),
+        ncol=4,
+        title="Technologies",
+    )
+    fig.suptitle(title, fontsize=12, fontweight="bold")
+    plt.tight_layout()
+    plt.savefig(figures_path / f"{variable}_comparison.png", dpi=300, bbox_inches="tight")
+    return fig, axes
+
+
+def plot_cost_comparison(stats, n, variable, variable_units, title, figures_path, reference_scenario=None):
+    combined_df = pd.DataFrame(columns=["Scenario", "statistics"], index=[])
+    for j, (scenario, stat) in enumerate(stats.items()):
+        stat = stat["statistics"]
+        combined_df = pd.concat(
+            [
+                combined_df,
+                pd.DataFrame(
+                    {
+                        "Scenario": scenario,
+                        "statistics": (
+                            (stat["Capital Expenditure"].sum() + stat["Operational Expenditure"].sum())
+                            * n.investment_period_weightings.objective.values
+                        ).sum()
+                        / 1e9,
+                    },
+                    index=[j],
+                ),
+            ],
+        )
+
+    combined_df.plot(kind="bar", x="Scenario", y="statistics", title="Total System Costs", legend=False)
+    plt.ylabel("Annualized System Costs [B$]")
+    plt.savefig(figures_path / f"{variable}_comparison.png", dpi=300, bbox_inches="tight")
+
+    if reference_scenario:
+        combined_df = combined_df.set_index("Scenario")
+        ref = combined_df.loc[reference_scenario]
+        pct_df = (combined_df - ref) / combined_df * 100
+        pct_df.plot(kind="bar", y="statistics", title="Total System Costs", legend=False)
+        plt.ylabel(" Reduction in Annualized System Costs [%]")
+        plt.savefig(figures_path / f"{variable}_pct_comparison.png", dpi=300, bbox_inches="tight")
+
+
+# Main execution
+if __name__ == "__main__":
+    yaml_path = Path.cwd().parent / "config/scenario_comparison.yaml"  # Path to the YAML file
+
+    # Load and process data
+    config = load_yaml_config(yaml_path)
+    scenarios = config["scenarios"]
+    raw_data = load_scenario_data(scenarios)
+
+    alias_dict = config.get("alias_dict", None)
+    new_order = config.get("new_order", None)
+    reference_scenario = config.get("reference_scenario", None)
+
+    figures_path = (
+        Path.cwd().parent / f"results/{config.get('output_folder_name', 'scenario_comparison')}"
+    )  # Directory to save the figures in the parent of cwd
+
+    figures_path.mkdir(exist_ok=True)
+
+    processed_data = process_data(raw_data, alias_dict, new_order)
+
+    n = pypsa.Network(config["network"]["path"])
+    # Example carrier setup
+    carriers = get_carriers(n)
+
+    # Example variable and title
+    variable = "Optimal Capacity"
+    variable_units = "GW"
+    title = "Capacity Evolution Comparison"
+
+    # Generate plots
+    scenario_comparison(processed_data, variable, variable_units, carriers, title, figures_path)
+
+    # Example variable and title
+    variable = "Supply"
+    variable_units = "GWh"
+    title = "Supply Evolution Comparison"
+
+    # Generate plots
+    scenario_comparison(processed_data, variable, variable_units, carriers, title, figures_path)
+
+    # Example variable and title
+    variable = "System Costs"
+    variable_units = "$B"
+    title = "Scenario Comparison"
+    plot_cost_comparison(processed_data, n, variable, variable_units, title, figures_path, reference_scenario)

From 6b893c7b6429551bd8fdfa040f656dfe1b25d13d Mon Sep 17 00:00:00 2001
From: ktehranchi <ktehranchi@stanford.edu>
Date: Thu, 2 Jan 2025 11:59:11 -0800
Subject: [PATCH 3/4] Fix for reconciling county and Reeds Topology differences

---
 workflow/scripts/cluster_network.py | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/workflow/scripts/cluster_network.py b/workflow/scripts/cluster_network.py
index 10fee018..faf8c4e8 100644
--- a/workflow/scripts/cluster_network.py
+++ b/workflow/scripts/cluster_network.py
@@ -55,8 +55,8 @@ def weighting_for_country(n, x):
     b_i = x.index
     g = normed(gen.reindex(b_i, fill_value=0))
     l = normed(load.reindex(b_i, fill_value=0))
-
     w = g + l
+
     return (w * (100.0 / w.max())).clip(lower=1.0).astype(int)
 
 
@@ -235,6 +235,26 @@ def fix_country_assignment_for_hac(n):
         solver_name=solver_name,
     )
 
+    # Potentiall remove buses and lines from these corner case counties from network
+    nc_set = set(n_clusters.index.get_level_values(0).unique())
+    bus_set = set(n.buses.country.unique())
+    countries_remove = list(bus_set - nc_set)
+    buses_remove = n.buses[n.buses.country.isin(countries_remove)]
+    if not buses_remove.empty:
+        logger.warning(f"Reconciling TAMU and ReEDS Topologies. \n Removing buses: {buses_remove.index}")
+        for c in n.one_port_components:
+            component = n.df(c)
+            rm = component[component.bus.isin(buses_remove.index)]
+            logger.warning(f"Removing {rm.shape} component {c}")
+            n.mremove(c, rm.index)
+        for c in ["Line", "Link"]:
+            component = n.df(c)
+            rm = component[component.bus0.isin(buses_remove.index) | component.bus1.isin(buses_remove.index)]
+            logger.warning(f"Removing {rm.shape} component {c}")
+            n.mremove(c, rm.index)
+        n.mremove("Bus", buses_remove.index)
+        n.determine_network_topology()
+
     def busmap_for_country(x):
         prefix = x.name[0] + x.name[1] + " "
         logger.debug(f"Determining busmap for country {prefix[:-1]}")

From e173a28c68e3b39a46e9813114bc46e737f4a86b Mon Sep 17 00:00:00 2001
From: ktehranchi <ktehranchi@stanford.edu>
Date: Thu, 2 Jan 2025 11:59:44 -0800
Subject: [PATCH 4/4] Fix for specifying correct new carriers

---
 workflow/scripts/add_extra_components.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/workflow/scripts/add_extra_components.py b/workflow/scripts/add_extra_components.py
index ec0ad493..125c606e 100644
--- a/workflow/scripts/add_extra_components.py
+++ b/workflow/scripts/add_extra_components.py
@@ -685,13 +685,8 @@ def apply_max_annual_growth_rate(n, max_growth):
     egs_gens = n.generators[n.generators["p_nom_extendable"] == True]
     egs_gens = egs_gens.loc[egs_gens["carrier"].str.contains("EGS")]
 
-    all_carriers = (
-        set(elec_config["extendable_carriers"].get("Generator", []))
-        | set(elec_config["conventional_carriers"])
-        | set(elec_config["renewable_carriers"])
-    )
     new_carriers = list(
-        all_carriers - set(n.generators.carrier.unique())
+        set(elec_config["extendable_carriers"].get("Generator", [])) - set(n.generators.carrier.unique())
         | set(["nuclear"] if "nuclear" in elec_config["extendable_carriers"].get("Generator", []) else []),
     )