diff --git a/.gitignore b/.gitignore index ebbab06..c2b68b7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ !eitc.csv !spm_threshold_agi.csv **/_build +!population_by_state.csv diff --git a/Makefile b/Makefile index 9bf8088..da9c5a7 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,8 @@ docker: documentation: jb clean docs && jb build docs + python docs/add_plotly_to_book.py docs + data: python policyengine_us_data/datasets/acs/acs.py diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..11e0c33 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,5 @@ +- bump: minor + changes: + added: + - Metric comparisons by dataset to the documentation. + - Calibration of state populations. diff --git a/docs/add_plotly_to_book.py b/docs/add_plotly_to_book.py new file mode 100644 index 0000000..822e77a --- /dev/null +++ b/docs/add_plotly_to_book.py @@ -0,0 +1,27 @@ +import argparse +from pathlib import Path + +# This command-line tools enables Plotly charts to show in the HTML files for the Jupyter Book documentation. + +parser = argparse.ArgumentParser() +parser.add_argument("book_path", help="Path to the Jupyter Book.") + +args = parser.parse_args() + +# Find every HTML file in the Jupyter Book. Then, add a script tag to the start of the tag in each file, with the contents: +# + +book_folder = Path(args.book_path) + +for html_file in book_folder.glob("**/*.html"): + with open(html_file, "r") as f: + html = f.read() + + # Add the script tag to the start of the tag. + html = html.replace( + "", + '', + ) + + with open(html_file, "w") as f: + f.write(html) diff --git a/docs/results.ipynb b/docs/results.ipynb index acb152e..f65669e 100644 --- a/docs/results.ipynb +++ b/docs/results.ipynb @@ -310,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 94, "metadata": {}, "outputs": [ { @@ -7270,7 +7270,8 @@ " title=\"Weight\",\n", " type=\"log\",\n", " ),\n", - ")" + ")\n", + "fig" ] } ], diff --git a/policyengine_us_data/datasets/acs/acs.py b/policyengine_us_data/datasets/acs/acs.py index 43f0c4d..0ecd3ee 100644 --- a/policyengine_us_data/datasets/acs/acs.py +++ b/policyengine_us_data/datasets/acs/acs.py @@ -111,7 +111,7 @@ class ACS_2022(ACS): time_period = 2022 file_path = STORAGE_FOLDER / "acs_2022.h5" census_acs = CensusACS_2022 - url = "release://PolicyEngine/policyengine-us-data/1.11.0/acs_2022.h5" + url = "release://PolicyEngine/policyengine-us-data/1.13.0/acs_2022.h5" if __name__ == "__main__": diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index cbcd756..ce51aef 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -648,7 +648,7 @@ class CPS_2024(CPS): label = "CPS 2024 (2022-based)" file_path = STORAGE_FOLDER / "cps_2024.h5" time_period = 2024 - url = "release://policyengine/policyengine-us-data/1.11.0/cps_2024.h5" + url = "release://policyengine/policyengine-us-data/1.13.0/cps_2024.h5" class PooledCPS(Dataset): @@ -707,7 +707,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS): CPS_2023, ] time_period = 2023 - url = "release://PolicyEngine/policyengine-us-data/1.11.0/pooled_3_year_cps_2023.h5" + url = "release://PolicyEngine/policyengine-us-data/1.13.0/pooled_3_year_cps_2023.h5" if __name__ == "__main__": diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py index da28b34..b9171c3 100644 --- a/policyengine_us_data/datasets/cps/enhanced_cps.py +++ b/policyengine_us_data/datasets/cps/enhanced_cps.py @@ -189,7 +189,7 @@ class EnhancedCPS_2024(EnhancedCPS): name = "enhanced_cps_2024" label = "Enhanced CPS 2024" file_path = STORAGE_FOLDER / "enhanced_cps_2024.h5" - url = "release://policyengine/policyengine-us-data/1.11.0/enhanced_cps_2024.h5" + url = "release://policyengine/policyengine-us-data/1.13.0/enhanced_cps_2024.h5" if __name__ == "__main__": diff --git a/policyengine_us_data/storage/population_by_state.csv b/policyengine_us_data/storage/population_by_state.csv new file mode 100644 index 0000000..0931747 --- /dev/null +++ b/policyengine_us_data/storage/population_by_state.csv @@ -0,0 +1,53 @@ +state,population,population_under_5 +CA,38965193.00,2104120.00 +TX,30503301.00,1921708.00 +FL,22610726.00,1130536.00 +NY,19571216.00,1037274.00 +PA,12961683.00,661046.00 +IL,12549689.00,665134.00 +OH,11785935.00,660012.00 +GA,11029227.00,639695.00 +NC,10835491.00,606787.00 +MI,10037261.00,531975.00 +NJ,9290841.00,520287.00 +VA,8715698.00,488079.00 +WA,7812880.00,421896.00 +AZ,7431344.00,393861.00 +TN,7126489.00,413336.00 +MA,7001399.00,343069.00 +IN,6862199.00,404870.00 +MO,6196156.00,353181.00 +MD,6180253.00,352274.00 +WI,5910955.00,313281.00 +CO,5877610.00,311513.00 +MN,5737915.00,327061.00 +SC,5373555.00,290172.00 +AL,5108468.00,291183.00 +LA,4573749.00,278999.00 +KY,4526154.00,262517.00 +OR,4233358.00,203201.00 +OK,4053824.00,243229.00 +CT,3617176.00,180859.00 +UT,3417734.00,232406.00 +IA,3207004.00,186006.00 +PR,3205691.00,96171.00 +NV,3194176.00,172486.00 +AR,3067732.00,180996.00 +KS,2940546.00,176433.00 +MS,2939690.00,173442.00 +NM,2114371.00,107833.00 +NE,1978379.00,124638.00 +ID,1964726.00,113954.00 +WV,1770071.00,86733.00 +HI,1435138.00,77497.00 +NH,1402054.00,63092.00 +ME,1395722.00,61412.00 +MT,1132812.00,57773.00 +RI,1095962.00,52606.00 +DE,1031890.00,54690.00 +SD,919318.00,57917.00 +ND,783926.00,49387.00 +AK,733406.00,46205.00 +DC,678972.00,38701.00 +VT,647464.00,27193.00 +WY,584057.00,30955.00 \ No newline at end of file diff --git a/policyengine_us_data/storage/uprating_factors.csv b/policyengine_us_data/storage/uprating_factors.csv index 3269bdc..897e3b1 100644 --- a/policyengine_us_data/storage/uprating_factors.csv +++ b/policyengine_us_data/storage/uprating_factors.csv @@ -3,6 +3,7 @@ alimony_expense,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1. alimony_income,1.0,1.255,1.322,1.357,1.446,1.504,1.535,1.567,1.576,1.595,1.622,1.655,1.689,1.723,1.779 american_opportunity_credit,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718 amt_foreign_tax_credit,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718 +capital_gains_before_response,1.0,1.824,1.11,1.195,1.244,1.195,1.14,1.122,1.126,1.145,1.173,1.206,1.243,1.283,1.326 casualty_loss,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718 cdcc_relevant_expenses,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718 charitable_cash_donations,1.0,1.166,1.148,1.215,1.28,1.318,1.35,1.389,1.428,1.467,1.513,1.561,1.611,1.663,1.718 diff --git a/policyengine_us_data/storage/uprating_growth_factors.csv b/policyengine_us_data/storage/uprating_growth_factors.csv index d020204..380f22d 100644 --- a/policyengine_us_data/storage/uprating_growth_factors.csv +++ b/policyengine_us_data/storage/uprating_growth_factors.csv @@ -3,6 +3,7 @@ alimony_expense,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0. alimony_income,0,0.255,0.053,0.026,0.066,0.04,0.021,0.021,0.006,0.012,0.017,0.02,0.021,0.02,0.033 american_opportunity_credit,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033 amt_foreign_tax_credit,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033 +capital_gains_before_response,0,0.824,-0.391,0.077,0.041,-0.039,-0.046,-0.016,0.004,0.017,0.024,0.028,0.031,0.032,0.034 casualty_loss,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033 cdcc_relevant_expenses,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033 charitable_cash_donations,0,0.166,-0.015,0.058,0.053,0.03,0.024,0.029,0.028,0.027,0.031,0.032,0.032,0.032,0.033 diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index 4669f89..a01b16a 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -322,6 +322,24 @@ def build_loss_matrix(dataset: type, time_period): ) targets_array.append(row["count"]) + # Population by state and population under 5 by state + + state_population = pd.read_csv(STORAGE_FOLDER / "population_by_state.csv") + + for _, row in state_population.iterrows(): + in_state = sim.calculate("state_code", map_to="person") == row["state"] + label = f"census/population_by_state/{row['state']}" + loss_matrix[label] = sim.map_result(in_state, "person", "household") + targets_array.append(row["population"]) + + under_5 = sim.calculate("age").values < 5 + in_state_under_5 = in_state * under_5 + label = f"census/population_under_5_by_state/{row['state']}" + loss_matrix[label] = sim.map_result( + in_state_under_5, "person", "household" + ) + targets_array.append(row["population_under_5"]) + if any(loss_matrix.isna().sum() > 0): raise ValueError("Some targets are missing from the loss matrix")