Skip to content

Commit

Permalink
[#17] Cummulative value
Browse files Browse the repository at this point in the history
  • Loading branch information
dedenbangkit committed Jan 21, 2025
1 parent c0d5ed1 commit 33c208b
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 38 deletions.
95 changes: 61 additions & 34 deletions src/main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@
"metadata": {},
"outputs": [],
"source": [
"final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value','base_value','initial_value','2030','2050']"
"final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value','base_value','initial_value','cumulative_value','2030','2050']"
]
},
{
Expand Down Expand Up @@ -374,7 +374,7 @@
" return \"BS\"\n",
" if \"Safely\" in x[\"2nd_dimension\"]:\n",
" return \"SM\"\n",
" return np.nan\n",
" return \"Base\"\n",
" return x[\"jmp_category\"]"
]
},
Expand Down Expand Up @@ -666,7 +666,7 @@
" \"value\": value_list[\"value\"]\n",
" })\n",
" df = pd.DataFrame(new_data)\n",
" df = filter_dataframe_by_year(df, file)\n",
" \n",
" df_split = pd.DataFrame(df['value_type'].tolist(), index=df.index)\n",
" df_split.columns = ['value_name', 'jmp_category', 'commitment']\n",
" df_final = pd.concat([df, df_split], axis=1)\n",
Expand All @@ -676,6 +676,18 @@
" df_final['jmp_category'] = df_final['jmp_category'].replace({\"BS\": \"ALB\"})\n",
" \n",
" df_final['commitment'] = df_final.apply(modify_commitment_name, axis=1)\n",
"\n",
" # Make sure that all value is numeric\n",
" df_final['value'] = pd.to_numeric(df_final['value'], errors='coerce')\n",
" df_final['value'] = df_final['value'].fillna(0)\n",
" \n",
" # Add cumulative column grouped by multiple columns\n",
" df_final.to_csv(\"../tests/original_data.csv\", index=False)\n",
" group_columns = ['country', 'jmp_category', 'commitment', 'indicator', 'value_name']\n",
" df_final['cumulative_value'] = df_final.groupby(group_columns)['value'].cumsum()\n",
" df_final['jmp_category'] = df_final['jmp_category'].replace('Base', np.nan)\n",
" \n",
" df_final = filter_dataframe_by_year(df_final, file)\n",
" # df_final.to_csv(\"testing-1.csv\",index=False)\n",
" # Add Value for ALB\n",
" if \"Water Service\" in file or \"Sanitation Service\" in file:\n",
Expand Down Expand Up @@ -805,6 +817,7 @@
" <th>commitment</th>\n",
" <th>value</th>\n",
" <th>base_value</th>\n",
" <th>cumulative</th>\n",
" <th>initial_value</th>\n",
" <th>2030</th>\n",
" <th>2050</th>\n",
Expand All @@ -823,6 +836,7 @@
" <td>Base</td>\n",
" <td>1.237</td>\n",
" <td>NaN</td>\n",
" <td>15.432</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -839,6 +853,7 @@
" <td>Base</td>\n",
" <td>1.143</td>\n",
" <td>NaN</td>\n",
" <td>39.085</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -855,6 +870,7 @@
" <td>Full Sanitation Access in 2030</td>\n",
" <td>1.075</td>\n",
" <td>1.237</td>\n",
" <td>14.357</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -871,6 +887,7 @@
" <td>Full Sanitation Access in 2030</td>\n",
" <td>1.064</td>\n",
" <td>1.143</td>\n",
" <td>35.647</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -887,6 +904,7 @@
" <td>Full Sanitation Access in 2050</td>\n",
" <td>1.191</td>\n",
" <td>1.237</td>\n",
" <td>15.116</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -911,12 +929,12 @@
"3 Mil People FS ALB Full Sanitation Access in 2030 1.064 \n",
"4 Mil People FS ALB Full Sanitation Access in 2050 1.191 \n",
"\n",
" base_value initial_value 2030 2050 remove \n",
"0 NaN NaN NaN NaN False \n",
"1 NaN NaN NaN NaN False \n",
"2 1.237 NaN NaN NaN False \n",
"3 1.143 NaN NaN NaN True \n",
"4 1.237 NaN NaN NaN True "
" base_value cumulative initial_value 2030 2050 remove \n",
"0 NaN 15.432 NaN NaN NaN False \n",
"1 NaN 39.085 NaN NaN NaN False \n",
"2 1.237 14.357 NaN NaN NaN False \n",
"3 1.143 35.647 NaN NaN NaN True \n",
"4 1.237 15.116 NaN NaN NaN True "
]
},
"execution_count": 32,
Expand Down Expand Up @@ -1799,6 +1817,7 @@
" <th>commitment</th>\n",
" <th>value</th>\n",
" <th>base_value</th>\n",
" <th>cumulative</th>\n",
" <th>initial_value</th>\n",
" <th>2030</th>\n",
" <th>2050</th>\n",
Expand All @@ -1818,6 +1837,7 @@
" <td>6x</td>\n",
" <td>0.208</td>\n",
" <td>0.302</td>\n",
" <td>2.964</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -1835,6 +1855,7 @@
" <td>6x</td>\n",
" <td>0.143</td>\n",
" <td>0.178</td>\n",
" <td>6.369</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -1850,13 +1871,13 @@
"36384 Malnourished Children, Headcount - Millions 2030 Zambia Mil People \n",
"36385 Malnourished Children, Headcount - Millions 2050 Zambia Mil People \n",
"\n",
" value_name jmp_category commitment value base_value initial_value \\\n",
"36384 WSI SM 6x 0.208 0.302 NaN \n",
"36385 WSI SM 6x 0.143 0.178 NaN \n",
" value_name jmp_category commitment value base_value cumulative \\\n",
"36384 WSI SM 6x 0.208 0.302 2.964 \n",
"36385 WSI SM 6x 0.143 0.178 6.369 \n",
"\n",
" 2030 2050 remove jmp_name_id \n",
"36384 NaN NaN False 3 \n",
"36385 NaN NaN False 3 "
" initial_value 2030 2050 remove jmp_name_id \n",
"36384 NaN NaN NaN False 3 \n",
"36385 NaN NaN NaN False 3 "
]
},
"execution_count": 43,
Expand Down Expand Up @@ -1930,6 +1951,7 @@
" <th>year</th>\n",
" <th>value</th>\n",
" <th>base_value</th>\n",
" <th>cumulative</th>\n",
" <th>initial_value</th>\n",
" <th>2030</th>\n",
" <th>2050</th>\n",
Expand All @@ -1949,6 +1971,7 @@
" <td>2050</td>\n",
" <td>7.761</td>\n",
" <td>8.036</td>\n",
" <td>537.412</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -1964,8 +1987,9 @@
" <tr>\n",
" <th>36382</th>\n",
" <td>2050</td>\n",
" <td>16.6</td>\n",
" <td>16.600</td>\n",
" <td>NaN</td>\n",
" <td>348.138</td>\n",
" <td>5.53</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -1983,6 +2007,7 @@
" <td>2050</td>\n",
" <td>9.581</td>\n",
" <td>4.591</td>\n",
" <td>52.341</td>\n",
" <td>0.90</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -2000,6 +2025,7 @@
" <td>2050</td>\n",
" <td>7.209</td>\n",
" <td>8.036</td>\n",
" <td>527.720</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -2017,6 +2043,7 @@
" <td>2050</td>\n",
" <td>0.143</td>\n",
" <td>0.178</td>\n",
" <td>6.369</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
Expand All @@ -2034,26 +2061,26 @@
"</div>"
],
"text/plain": [
" year value base_value initial_value 2030 2050 remove jmp_name_id \\\n",
"36381 2050 7.761 8.036 NaN NaN NaN False 3 \n",
"36382 2050 16.6 NaN 5.53 NaN NaN False 1 \n",
"36383 2050 9.581 4.591 0.90 NaN NaN False 2 \n",
"36384 2050 7.209 8.036 NaN NaN NaN False 1 \n",
"36385 2050 0.143 0.178 NaN NaN NaN False 3 \n",
" year value base_value cumulative initial_value 2030 2050 \\\n",
"36381 2050 7.761 8.036 537.412 NaN NaN NaN \n",
"36382 2050 16.600 NaN 348.138 5.53 NaN NaN \n",
"36383 2050 9.581 4.591 52.341 0.90 NaN NaN \n",
"36384 2050 7.209 8.036 527.720 NaN NaN NaN \n",
"36385 2050 0.143 0.178 6.369 NaN NaN NaN \n",
"\n",
" indicator_id unit_id value_name_id jmp_category_id commitment_id \\\n",
"36381 5 4 7 1 1 \n",
"36382 12 5 1 2 5 \n",
"36383 6 5 2 1 7 \n",
"36384 5 4 6 2 4 \n",
"36385 4 4 7 2 4 \n",
" remove jmp_name_id indicator_id unit_id value_name_id \\\n",
"36381 False 3 5 4 7 \n",
"36382 False 1 12 5 1 \n",
"36383 False 2 6 5 2 \n",
"36384 False 1 5 4 6 \n",
"36385 False 3 4 4 7 \n",
"\n",
" country_id \n",
"36381 22 \n",
"36382 15 \n",
"36383 10 \n",
"36384 22 \n",
"36385 23 "
" jmp_category_id commitment_id country_id \n",
"36381 1 1 22 \n",
"36382 2 5 15 \n",
"36383 1 7 10 \n",
"36384 2 4 22 \n",
"36385 2 4 23 "
]
},
"execution_count": 45,
Expand Down
20 changes: 16 additions & 4 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def map_country_name(country):
# In[13]:


final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value','base_value','initial_value','2030','2050']
final_columns = ['indicator','year','country','unit','value_name','jmp_category','commitment','value','base_value','initial_value','cumulative_value','2030','2050']


# In[14]:
Expand Down Expand Up @@ -283,7 +283,7 @@ def remove_unmatches_jmp_category(x):
if x["2nd_dimension"] == "SafelyManaged" and x["jmp_category"] == "ALB":
return True
if x["2nd_dimension"] == "SafelyManaged" and x["jmp_category"] == "BS":
return True
return "Base"
return False


Expand Down Expand Up @@ -441,16 +441,28 @@ def populate_data():
"value": value_list["value"]
})
df = pd.DataFrame(new_data)
df = filter_dataframe_by_year(df, file)

df_split = pd.DataFrame(df['value_type'].tolist(), index=df.index)
df_split.columns = ['value_name', 'jmp_category', 'commitment']
df_final = pd.concat([df, df_split], axis=1)

df_final['indicator'] = get_ifs_name(file)
df_final['jmp_category'] = df_final.apply(base_jmp_category, axis=1)
df_final['jmp_category'] = df_final['jmp_category'].replace({"BS": "ALB"})

df_final['commitment'] = df_final.apply(modify_commitment_name, axis=1)

# Make sure that all value is numeric
df_final['value'] = pd.to_numeric(df_final['value'], errors='coerce')
df_final['value'] = df_final['value'].fillna(0)

# Add cumulative column grouped by multiple columns
df_final.to_csv("../tests/original_data.csv", index=False)
group_columns = ['country', 'jmp_category', 'commitment', 'indicator', 'value_name']
df_final['cumulative_value'] = df_final.groupby(group_columns)['value'].cumsum()
df_final['jmp_category'] = df_final['jmp_category'].replace('Base', np.nan)

df_final = filter_dataframe_by_year(df_final, file)
# df_final.to_csv("testing-1.csv",index=False)
# Add Value for ALB
if "Water Service" in file or "Sanitation Service" in file:
Expand Down

0 comments on commit 33c208b

Please # to comment.