From cf3cc80c33b1d9c2058b0fd053ca2eac4925f7dc Mon Sep 17 00:00:00 2001 From: ttuff <ty.tuff@colorado.edu> Date: Mon, 25 Mar 2024 18:49:32 +0000 Subject: [PATCH] reshape update --- reshape.ipynb | 540 ++++++++++++++++++++++++++++---------------------- 1 file changed, 302 insertions(+), 238 deletions(-) diff --git a/reshape.ipynb b/reshape.ipynb index 20d4954..911d6d8 100644 --- a/reshape.ipynb +++ b/reshape.ipynb @@ -2,10 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "c501955a-4e56-40df-93e4-346c6e5ad935", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -23,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "id": "ebe8b0c5-0171-4140-a9d2-e038fa3239d4", "metadata": {}, "outputs": [], @@ -64,7 +73,7 @@ "\n", "# Example usage:\n", "# Replace 'your_envi_file_path' with the actual path to your ENVI file\n", - "raster_path = \"NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200807_155314_reflectance/NEON_D13_NIWO_DP1_20200807_155314_reflectanceNEON_D13_NIWO_DP1_20200807_155314_reflectance__envi\" # Update this to your actual raster file path\n", + "raster_path = \"NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200801_161441_reflectance/NEON_D13_NIWO_DP1_20200801_161441_reflectance\" # Update this to your actual raster file path\n", "processor = ENVIProcessor(raster_path)\n", "chunk = processor.get_chunk_from_extent(corrections=['some_correction'], resample=False)\n", "\n" @@ -82,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "id": "1039151f-97ae-48b2-b757-efa640080906", "metadata": {}, "outputs": [], @@ -127,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "e7f55111-3cb3-4b5d-a3bd-d7cf588ce1ef", "metadata": {}, "outputs": [ @@ -178,121 +187,121 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>0.797209</td>\n", - " <td>0.314943</td>\n", - " <td>0.515625</td>\n", - " <td>0.289574</td>\n", - " <td>0.567998</td>\n", - " <td>0.711330</td>\n", - " <td>0.985263</td>\n", - " <td>0.488050</td>\n", - " <td>0.669980</td>\n", - " <td>0.047356</td>\n", + " <td>0.687524</td>\n", + " <td>0.304722</td>\n", + " <td>0.419995</td>\n", + " <td>0.259451</td>\n", + " <td>0.507092</td>\n", + " <td>0.323172</td>\n", + " <td>0.403418</td>\n", + " <td>0.410082</td>\n", + " <td>0.041743</td>\n", + " <td>0.759389</td>\n", " <td>...</td>\n", - " <td>0.251188</td>\n", - " <td>0.565922</td>\n", - " <td>0.188862</td>\n", - " <td>0.551562</td>\n", - " <td>0.465831</td>\n", - " <td>0.023405</td>\n", - " <td>0.336994</td>\n", - " <td>0.450468</td>\n", + " <td>0.382299</td>\n", + " <td>0.014212</td>\n", + " <td>0.008055</td>\n", + " <td>0.713550</td>\n", + " <td>0.675177</td>\n", + " <td>0.513762</td>\n", + " <td>0.248871</td>\n", + " <td>0.072115</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>0.936276</td>\n", - " <td>0.837195</td>\n", - " <td>0.695625</td>\n", - " <td>0.631541</td>\n", - " <td>0.701025</td>\n", - " <td>0.960226</td>\n", - " <td>0.141791</td>\n", - " <td>0.381211</td>\n", - " <td>0.114170</td>\n", - " <td>0.788448</td>\n", + " <td>0.718638</td>\n", + " <td>0.490737</td>\n", + " <td>0.360811</td>\n", + " <td>0.531658</td>\n", + " <td>0.511925</td>\n", + " <td>0.502386</td>\n", + " <td>0.924304</td>\n", + " <td>0.908558</td>\n", + " <td>0.062361</td>\n", + " <td>0.543072</td>\n", " <td>...</td>\n", - " <td>0.230309</td>\n", - " <td>0.182223</td>\n", - " <td>0.290250</td>\n", - " <td>0.975250</td>\n", - " <td>0.227430</td>\n", - " <td>0.175060</td>\n", - " <td>0.567754</td>\n", - " <td>0.865518</td>\n", + " <td>0.448249</td>\n", + " <td>0.347809</td>\n", + " <td>0.912210</td>\n", + " <td>0.629690</td>\n", + " <td>0.046711</td>\n", + " <td>0.932032</td>\n", + " <td>0.579875</td>\n", + " <td>0.617307</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>0.283778</td>\n", - " <td>0.107825</td>\n", - " <td>0.300096</td>\n", - " <td>0.302607</td>\n", - " <td>0.572963</td>\n", - " <td>0.244061</td>\n", - " <td>0.893596</td>\n", - " <td>0.930337</td>\n", - " <td>0.607978</td>\n", - " <td>0.133053</td>\n", + " <td>0.911718</td>\n", + " <td>0.410745</td>\n", + " <td>0.934476</td>\n", + " <td>0.256145</td>\n", + " <td>0.123791</td>\n", + " <td>0.206306</td>\n", + " <td>0.282671</td>\n", + " <td>0.817732</td>\n", + " <td>0.919495</td>\n", + " <td>0.264951</td>\n", " <td>...</td>\n", - " <td>0.597720</td>\n", - " <td>0.048488</td>\n", - " <td>0.564245</td>\n", - " <td>0.728381</td>\n", - " <td>0.535179</td>\n", - " <td>0.961610</td>\n", - " <td>0.464103</td>\n", - " <td>0.565047</td>\n", + " <td>0.284105</td>\n", + " <td>0.207181</td>\n", + " <td>0.075888</td>\n", + " <td>0.470861</td>\n", + " <td>0.049957</td>\n", + " <td>0.371652</td>\n", + " <td>0.781437</td>\n", + " <td>0.655946</td>\n", " <td>0</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>0.457389</td>\n", - " <td>0.719769</td>\n", - " <td>0.118473</td>\n", - " <td>0.407864</td>\n", - " <td>0.979559</td>\n", - " <td>0.097455</td>\n", - " <td>0.728281</td>\n", - " <td>0.564473</td>\n", - " <td>0.634733</td>\n", - " <td>0.200184</td>\n", + " <td>0.405207</td>\n", + " <td>0.791517</td>\n", + " <td>0.936675</td>\n", + " <td>0.433172</td>\n", + " <td>0.866013</td>\n", + " <td>0.461116</td>\n", + " <td>0.322686</td>\n", + " <td>0.013696</td>\n", + " <td>0.010301</td>\n", + " <td>0.600920</td>\n", " <td>...</td>\n", - " <td>0.239004</td>\n", - " <td>0.365207</td>\n", - " <td>0.195417</td>\n", - " <td>0.237427</td>\n", - " <td>0.699486</td>\n", - " <td>0.205661</td>\n", - " <td>0.734998</td>\n", - " <td>0.589876</td>\n", + " <td>0.764502</td>\n", + " <td>0.966043</td>\n", + " <td>0.137867</td>\n", + " <td>0.466293</td>\n", + " <td>0.657372</td>\n", + " <td>0.682398</td>\n", + " <td>0.611508</td>\n", + " <td>0.221807</td>\n", " <td>0</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>0.366526</td>\n", - " <td>0.529941</td>\n", - " <td>0.417289</td>\n", - " <td>0.847633</td>\n", - " <td>0.193126</td>\n", - " <td>0.982161</td>\n", - " <td>0.822412</td>\n", - " <td>0.445544</td>\n", - " <td>0.044492</td>\n", - " <td>0.017709</td>\n", + " <td>0.063385</td>\n", + " <td>0.777884</td>\n", + " <td>0.941228</td>\n", + " <td>0.666460</td>\n", + " <td>0.575846</td>\n", + " <td>0.827420</td>\n", + " <td>0.859770</td>\n", + " <td>0.105069</td>\n", + " <td>0.319631</td>\n", + " <td>0.696536</td>\n", " <td>...</td>\n", - " <td>0.742343</td>\n", - " <td>0.772829</td>\n", - " <td>0.340681</td>\n", - " <td>0.146870</td>\n", - " <td>0.946994</td>\n", - " <td>0.066265</td>\n", - " <td>0.499453</td>\n", - " <td>0.388927</td>\n", + " <td>0.038016</td>\n", + " <td>0.638541</td>\n", + " <td>0.814261</td>\n", + " <td>0.258037</td>\n", + " <td>0.573052</td>\n", + " <td>0.936515</td>\n", + " <td>0.425533</td>\n", + " <td>0.659791</td>\n", " <td>0</td>\n", " <td>4</td>\n", " </tr>\n", @@ -322,121 +331,121 @@ " </tr>\n", " <tr>\n", " <th>11483273</th>\n", - " <td>0.152260</td>\n", - " <td>0.766855</td>\n", - " <td>0.169944</td>\n", - " <td>0.878360</td>\n", - " <td>0.383142</td>\n", - " <td>0.539870</td>\n", - " <td>0.985659</td>\n", - " <td>0.650074</td>\n", - " <td>0.760505</td>\n", - " <td>0.952661</td>\n", + " <td>0.762910</td>\n", + " <td>0.475233</td>\n", + " <td>0.448766</td>\n", + " <td>0.360571</td>\n", + " <td>0.277480</td>\n", + " <td>0.073007</td>\n", + " <td>0.423881</td>\n", + " <td>0.083852</td>\n", + " <td>0.307329</td>\n", + " <td>0.339650</td>\n", " <td>...</td>\n", - " <td>0.317700</td>\n", - " <td>0.078385</td>\n", - " <td>0.648448</td>\n", - " <td>0.056393</td>\n", - " <td>0.945091</td>\n", - " <td>0.665414</td>\n", - " <td>0.529980</td>\n", - " <td>0.593835</td>\n", + " <td>0.202143</td>\n", + " <td>0.914050</td>\n", + " <td>0.561372</td>\n", + " <td>0.528521</td>\n", + " <td>0.656066</td>\n", + " <td>0.789577</td>\n", + " <td>0.642838</td>\n", + " <td>0.791538</td>\n", " <td>11137</td>\n", " <td>1026</td>\n", " </tr>\n", " <tr>\n", " <th>11483274</th>\n", - " <td>0.345999</td>\n", - " <td>0.479799</td>\n", - " <td>0.496870</td>\n", - " <td>0.479557</td>\n", - " <td>0.999687</td>\n", - " <td>0.157588</td>\n", - " <td>0.562706</td>\n", - " <td>0.481475</td>\n", - " <td>0.395999</td>\n", - " <td>0.912812</td>\n", + " <td>0.517250</td>\n", + " <td>0.058740</td>\n", + " <td>0.460863</td>\n", + " <td>0.808285</td>\n", + " <td>0.752132</td>\n", + " <td>0.039975</td>\n", + " <td>0.165822</td>\n", + " <td>0.996934</td>\n", + " <td>0.440954</td>\n", + " <td>0.302505</td>\n", " <td>...</td>\n", - " <td>0.514387</td>\n", - " <td>0.211770</td>\n", - " <td>0.179397</td>\n", - " <td>0.664200</td>\n", - " <td>0.767492</td>\n", - " <td>0.368559</td>\n", - " <td>0.885074</td>\n", - " <td>0.608405</td>\n", + " <td>0.252671</td>\n", + " <td>0.592800</td>\n", + " <td>0.900815</td>\n", + " <td>0.566804</td>\n", + " <td>0.788629</td>\n", + " <td>0.046903</td>\n", + " <td>0.734878</td>\n", + " <td>0.644062</td>\n", " <td>11137</td>\n", " <td>1027</td>\n", " </tr>\n", " <tr>\n", " <th>11483275</th>\n", - " <td>0.748482</td>\n", - " <td>0.771680</td>\n", - " <td>0.299341</td>\n", - " <td>0.296593</td>\n", - " <td>0.102600</td>\n", - " <td>0.957731</td>\n", - " <td>0.038437</td>\n", - " <td>0.549724</td>\n", - " <td>0.076379</td>\n", - " <td>0.128406</td>\n", + " <td>0.119028</td>\n", + " <td>0.988765</td>\n", + " <td>0.295493</td>\n", + " <td>0.011601</td>\n", + " <td>0.173720</td>\n", + " <td>0.642613</td>\n", + " <td>0.409197</td>\n", + " <td>0.529874</td>\n", + " <td>0.447891</td>\n", + " <td>0.788607</td>\n", " <td>...</td>\n", - " <td>0.251922</td>\n", - " <td>0.673968</td>\n", - " <td>0.346867</td>\n", - " <td>0.888135</td>\n", - " <td>0.471597</td>\n", - " <td>0.753125</td>\n", - " <td>0.630612</td>\n", - " <td>0.409271</td>\n", + " <td>0.692102</td>\n", + " <td>0.625139</td>\n", + " <td>0.715138</td>\n", + " <td>0.082538</td>\n", + " <td>0.059340</td>\n", + " <td>0.858740</td>\n", + " <td>0.892223</td>\n", + " <td>0.610468</td>\n", " <td>11137</td>\n", " <td>1028</td>\n", " </tr>\n", " <tr>\n", " <th>11483276</th>\n", - " <td>0.526175</td>\n", - " <td>0.401877</td>\n", - " <td>0.607318</td>\n", - " <td>0.477941</td>\n", - " <td>0.549137</td>\n", - " <td>0.047495</td>\n", - " <td>0.769956</td>\n", - " <td>0.422864</td>\n", - " <td>0.126572</td>\n", - " <td>0.073549</td>\n", + " <td>0.504178</td>\n", + " <td>0.161125</td>\n", + " <td>0.412866</td>\n", + " <td>0.960469</td>\n", + " <td>0.941566</td>\n", + " <td>0.294474</td>\n", + " <td>0.123558</td>\n", + " <td>0.640859</td>\n", + " <td>0.229713</td>\n", + " <td>0.782693</td>\n", " <td>...</td>\n", - " <td>0.406933</td>\n", - " <td>0.387700</td>\n", - " <td>0.204090</td>\n", - " <td>0.547156</td>\n", - " <td>0.558324</td>\n", - " <td>0.079974</td>\n", - " <td>0.169205</td>\n", - " <td>0.287978</td>\n", + " <td>0.919020</td>\n", + " <td>0.741212</td>\n", + " <td>0.643234</td>\n", + " <td>0.022654</td>\n", + " <td>0.111536</td>\n", + " <td>0.840001</td>\n", + " <td>0.001191</td>\n", + " <td>0.003219</td>\n", " <td>11137</td>\n", " <td>1029</td>\n", " </tr>\n", " <tr>\n", " <th>11483277</th>\n", - " <td>0.632683</td>\n", - " <td>0.266278</td>\n", - " <td>0.919380</td>\n", - " <td>0.899299</td>\n", - " <td>0.806271</td>\n", - " <td>0.843766</td>\n", - " <td>0.107567</td>\n", - " <td>0.909166</td>\n", - " <td>0.068002</td>\n", - " <td>0.097232</td>\n", + " <td>0.155903</td>\n", + " <td>0.951039</td>\n", + " <td>0.251885</td>\n", + " <td>0.919702</td>\n", + " <td>0.133986</td>\n", + " <td>0.866848</td>\n", + " <td>0.172710</td>\n", + " <td>0.682367</td>\n", + " <td>0.706197</td>\n", + " <td>0.993416</td>\n", " <td>...</td>\n", - " <td>0.000711</td>\n", - " <td>0.474354</td>\n", - " <td>0.458854</td>\n", - " <td>0.794206</td>\n", - " <td>0.908586</td>\n", - " <td>0.380833</td>\n", - " <td>0.938612</td>\n", - " <td>0.554012</td>\n", + " <td>0.823101</td>\n", + " <td>0.724665</td>\n", + " <td>0.401648</td>\n", + " <td>0.135023</td>\n", + " <td>0.350144</td>\n", + " <td>0.957360</td>\n", + " <td>0.173104</td>\n", + " <td>0.677269</td>\n", " <td>11137</td>\n", " <td>1030</td>\n", " </tr>\n", @@ -447,43 +456,43 @@ ], "text/plain": [ " Band_1 Band_2 Band_3 Band_4 Band_5 Band_6 \\\n", - "0 0.797209 0.314943 0.515625 0.289574 0.567998 0.711330 \n", - "1 0.936276 0.837195 0.695625 0.631541 0.701025 0.960226 \n", - "2 0.283778 0.107825 0.300096 0.302607 0.572963 0.244061 \n", - "3 0.457389 0.719769 0.118473 0.407864 0.979559 0.097455 \n", - "4 0.366526 0.529941 0.417289 0.847633 0.193126 0.982161 \n", + "0 0.687524 0.304722 0.419995 0.259451 0.507092 0.323172 \n", + "1 0.718638 0.490737 0.360811 0.531658 0.511925 0.502386 \n", + "2 0.911718 0.410745 0.934476 0.256145 0.123791 0.206306 \n", + "3 0.405207 0.791517 0.936675 0.433172 0.866013 0.461116 \n", + "4 0.063385 0.777884 0.941228 0.666460 0.575846 0.827420 \n", "... ... ... ... ... ... ... \n", - "11483273 0.152260 0.766855 0.169944 0.878360 0.383142 0.539870 \n", - "11483274 0.345999 0.479799 0.496870 0.479557 0.999687 0.157588 \n", - "11483275 0.748482 0.771680 0.299341 0.296593 0.102600 0.957731 \n", - "11483276 0.526175 0.401877 0.607318 0.477941 0.549137 0.047495 \n", - "11483277 0.632683 0.266278 0.919380 0.899299 0.806271 0.843766 \n", + "11483273 0.762910 0.475233 0.448766 0.360571 0.277480 0.073007 \n", + "11483274 0.517250 0.058740 0.460863 0.808285 0.752132 0.039975 \n", + "11483275 0.119028 0.988765 0.295493 0.011601 0.173720 0.642613 \n", + "11483276 0.504178 0.161125 0.412866 0.960469 0.941566 0.294474 \n", + "11483277 0.155903 0.951039 0.251885 0.919702 0.133986 0.866848 \n", "\n", " Band_7 Band_8 Band_9 Band_10 ... Band_419 Band_420 \\\n", - "0 0.985263 0.488050 0.669980 0.047356 ... 0.251188 0.565922 \n", - "1 0.141791 0.381211 0.114170 0.788448 ... 0.230309 0.182223 \n", - "2 0.893596 0.930337 0.607978 0.133053 ... 0.597720 0.048488 \n", - "3 0.728281 0.564473 0.634733 0.200184 ... 0.239004 0.365207 \n", - "4 0.822412 0.445544 0.044492 0.017709 ... 0.742343 0.772829 \n", + "0 0.403418 0.410082 0.041743 0.759389 ... 0.382299 0.014212 \n", + "1 0.924304 0.908558 0.062361 0.543072 ... 0.448249 0.347809 \n", + "2 0.282671 0.817732 0.919495 0.264951 ... 0.284105 0.207181 \n", + "3 0.322686 0.013696 0.010301 0.600920 ... 0.764502 0.966043 \n", + "4 0.859770 0.105069 0.319631 0.696536 ... 0.038016 0.638541 \n", "... ... ... ... ... ... ... ... \n", - "11483273 0.985659 0.650074 0.760505 0.952661 ... 0.317700 0.078385 \n", - "11483274 0.562706 0.481475 0.395999 0.912812 ... 0.514387 0.211770 \n", - "11483275 0.038437 0.549724 0.076379 0.128406 ... 0.251922 0.673968 \n", - "11483276 0.769956 0.422864 0.126572 0.073549 ... 0.406933 0.387700 \n", - "11483277 0.107567 0.909166 0.068002 0.097232 ... 0.000711 0.474354 \n", + "11483273 0.423881 0.083852 0.307329 0.339650 ... 0.202143 0.914050 \n", + "11483274 0.165822 0.996934 0.440954 0.302505 ... 0.252671 0.592800 \n", + "11483275 0.409197 0.529874 0.447891 0.788607 ... 0.692102 0.625139 \n", + "11483276 0.123558 0.640859 0.229713 0.782693 ... 0.919020 0.741212 \n", + "11483277 0.172710 0.682367 0.706197 0.993416 ... 0.823101 0.724665 \n", "\n", " Band_421 Band_422 Band_423 Band_424 Band_425 Band_426 \\\n", - "0 0.188862 0.551562 0.465831 0.023405 0.336994 0.450468 \n", - "1 0.290250 0.975250 0.227430 0.175060 0.567754 0.865518 \n", - "2 0.564245 0.728381 0.535179 0.961610 0.464103 0.565047 \n", - "3 0.195417 0.237427 0.699486 0.205661 0.734998 0.589876 \n", - "4 0.340681 0.146870 0.946994 0.066265 0.499453 0.388927 \n", + "0 0.008055 0.713550 0.675177 0.513762 0.248871 0.072115 \n", + "1 0.912210 0.629690 0.046711 0.932032 0.579875 0.617307 \n", + "2 0.075888 0.470861 0.049957 0.371652 0.781437 0.655946 \n", + "3 0.137867 0.466293 0.657372 0.682398 0.611508 0.221807 \n", + "4 0.814261 0.258037 0.573052 0.936515 0.425533 0.659791 \n", "... ... ... ... ... ... ... \n", - "11483273 0.648448 0.056393 0.945091 0.665414 0.529980 0.593835 \n", - "11483274 0.179397 0.664200 0.767492 0.368559 0.885074 0.608405 \n", - "11483275 0.346867 0.888135 0.471597 0.753125 0.630612 0.409271 \n", - "11483276 0.204090 0.547156 0.558324 0.079974 0.169205 0.287978 \n", - "11483277 0.458854 0.794206 0.908586 0.380833 0.938612 0.554012 \n", + "11483273 0.561372 0.528521 0.656066 0.789577 0.642838 0.791538 \n", + "11483274 0.900815 0.566804 0.788629 0.046903 0.734878 0.644062 \n", + "11483275 0.715138 0.082538 0.059340 0.858740 0.892223 0.610468 \n", + "11483276 0.643234 0.022654 0.111536 0.840001 0.001191 0.003219 \n", + "11483277 0.401648 0.135023 0.350144 0.957360 0.173104 0.677269 \n", "\n", " Pixel_Row Pixel_Col \n", "0 0 0 \n", @@ -501,7 +510,7 @@ "[11483278 rows x 428 columns]" ] }, - "execution_count": 3, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -527,36 +536,31 @@ "source": [ "import numpy as np\n", "\n", - "def flatten_and_melt_array_to_structured_array(array):\n", + "def iterate_flatten_melt_array(array):\n", " \"\"\"\n", - " Flattens a 3D numpy array to a structured numpy array in the \"melted\" format.\n", + " Generator to iterate over a 3D numpy array and yield \"melted\" data.\n", " \n", " Parameters:\n", " - array: A 3D numpy array of shape (bands, rows, cols).\n", " \n", - " Returns:\n", - " - A structured numpy array with 'Pixel_Row', 'Pixel_Col', 'Band_ID', and 'Wavelength' fields.\n", + " Yields:\n", + " - Tuple of (Pixel_Row, Pixel_Col, Band_ID, Wavelength) for each pixel-band combination.\n", " \"\"\"\n", " bands, rows, cols = array.shape\n", - " total_pixels = rows * cols\n", - " dtype = [('Pixel_Row', int), ('Pixel_Col', int), ('Band_ID', 'U10'), ('Wavelength', array.dtype)]\n", - " \n", - " # Create an empty structured array\n", - " structured_array = np.zeros(total_pixels * bands, dtype=dtype)\n", " \n", - " # Populate the structured array\n", - " counter = 0\n", - " for row in range(rows):\n", - " for col in range(cols):\n", - " for band in range(bands):\n", - " structured_array[counter] = (row, col, f'Band_{band+1}', array[band, row, col])\n", - " counter += 1\n", - " \n", - " return structured_array\n", + " for band in range(bands):\n", + " for row in range(rows):\n", + " for col in range(cols):\n", + " yield (row, col, f'Band_{band+1}', array[band, row, col])\n", "\n", "# Example usage\n", - "chunk = np.random.rand(426, 11138, 1031) # Example array, replace with your actual data\n", - "melted_array = flatten_and_melt_array_to_structured_array(chunk)\n" + "chunk = np.random.rand(426, 11138, 1031) # Replace with your actual data\n", + "\n", + "# To demonstrate or test the generator, you can iterate through a small portion of it\n", + "for i, data_point in enumerate(iterate_flatten_melt_array(chunk)):\n", + " print(data_point)\n", + " if i > 100: # Adjust this condition to control how many items you want to print\n", + " break\n" ] }, { @@ -566,8 +570,68 @@ "metadata": {}, "outputs": [], "source": [ - "melted_array" + "import csv\n", + "\n", + "# Open a CSV file for writing\n", + "with open('melted_data.csv', 'w', newline='') as file:\n", + " writer = csv.writer(file)\n", + " writer.writerow(['Pixel_Row', 'Pixel_Col', 'Band_ID', 'Wavelength']) # Write header\n", + "\n", + " # Write each data point\n", + " for data_point in iterate_flatten_melt_array(chunk):\n", + " writer.writerow(data_point)\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "def batch_flatten_melt_array(array, batch_size=1000000):\n", + " \"\"\"\n", + " Generator to iterate over a 3D numpy array and yield batches of \"melted\" data.\n", + " \n", + " Parameters:\n", + " - array: A 3D numpy array of shape (bands, rows, cols).\n", + " - batch_size: The number of rows in each batch.\n", + " \n", + " Yields:\n", + " - A DataFrame containing a batch of melted data.\n", + " \"\"\"\n", + " bands, rows, cols = array.shape\n", + " total_pixels = rows * cols\n", + " num_batches = (total_pixels + batch_size - 1) // batch_size # Ceiling division to get the number of batches\n", + " \n", + " for batch in range(num_batches):\n", + " batch_data = []\n", + " start_index = batch * batch_size\n", + " end_index = min(start_index + batch_size, total_pixels)\n", + " \n", + " for index in range(start_index, end_index):\n", + " row = index // cols\n", + " col = index % cols\n", + " for band in range(bands):\n", + " batch_data.append((row, col, f'Band_{band+1}', array[band, row, col]))\n", + " \n", + " batch_df = pd.DataFrame(batch_data, columns=['Pixel_Row', 'Pixel_Col', 'Band_ID', 'Wavelength'])\n", + " yield batch_df\n", + "\n", + "# Example usage\n", + "chunk = np.random.rand(426, 11138, 1031) # Replace with your actual data\n", + "\n", + "# Iterate through each batch and process\n", + "for i, batch_df in enumerate(batch_flatten_melt_array(chunk)):\n", + " print(f\"Processing batch {i+1}\")\n", + " # Process the batch_df here\n", + " # For example, you could save each batch to a separate CSV file\n", + " batch_df.to_csv(f'melted_data_batch_{i+1}.csv', index=False)\n", + " if i == 0: # For demonstration, break after processing the first batch\n", + " break\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "551e6f3c-68ea-446a-806d-d789ab16d2fe", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {