Skip to content

Commit

Permalink
Fix non-functional stackdates in grid.py (#329)
Browse files Browse the repository at this point in the history
  • Loading branch information
berland authored Jul 6, 2021
1 parent 499b041 commit be46913
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 20 deletions.
36 changes: 25 additions & 11 deletions ecl2df/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ def rst2df(
"""Return a dataframe with dynamic data from the restart file
for each cell, at a particular date.
The dataframe will have a dummy index. The column named
"active" refers to the active cell index, and is to be used
when merging with the grid geometry dataframe.
Args:
eclfiles: EclFiles object
date: datetime.date or list of datetime.date, must
Expand All @@ -137,7 +141,7 @@ def rst2df(
dateinheaders: boolean on whether the date should
be added to the column headers. Instead of
SGAS as a column header, you get SGAS@YYYY-MM-DD.
stackdates: Default is false. If true, a column
stackdates: Default is false. If True, a column
called DATE will be added and data for all restart
dates will be added in a stacked manner. Implies
dateinheaders False.
Expand Down Expand Up @@ -233,16 +237,18 @@ def rst2df(
# Remove columns that are all NaN:
rst_df.dropna(axis="columns", how="all", inplace=True)

rst_df.index.name = "active"

rst_dfs[datestr] = rst_df

if not rst_dfs:
return pd.DataFrame()

if not stackdates:
return pd.concat(rst_dfs.values(), axis=1)
return pd.concat(rst_dfs.values(), axis=1).reset_index()

rststack = pd.concat(rst_dfs, sort=False).reset_index()
rststack.rename(columns={"level_0": "DATE"}, inplace=True)
del rststack["level_1"]
return rststack


Expand Down Expand Up @@ -451,21 +457,21 @@ def df(
any time dependent data from Restart files.
Args:
eclfiles (EclFiles): Handle to an Eclipse case
vectors (str or list): Vectors to include, wildcards
eclfiles: Handle to an Eclipse case
vectors: Vectors to include, wildcards
supported. Used to match both
INIT vectors and RESTART vectors.
dropconstants (bool): If true, columns that are constant
for every cell are dropped.
rstdates (list, str or datetime): Restart dates to include in ISO-8601 format.
rstdates: Restart dates to include in ISO-8601 format.
Alternatively, pick from the mnenomics 'first', 'all' and 'last'.
dateinheaders (bool): Whether columns with data from UNRST files
dateinheaders: Whether columns with data from UNRST files
should always have the ISO-date embedded in the column header.
stackdates (bool): Default is false. If true, a column
stackdates: Default is false. If true, a column
called DATE will be added and data for all restart
dates will be added in a stacked manner. Implies
dateinheaders False.
zonemap (dict): A zonemap dictionary mapping every K index to a
zonemap: A zonemap dictionary mapping every K index to a
string, which will be put in a column ZONE. If none is provided,
a zonemap from a default file will be looked for. Provide an empty
dictionary to avoid looking for the default file, and no ZONE
Expand All @@ -482,10 +488,18 @@ def df(
dateinheaders=dateinheaders,
stackdates=stackdates,
)
grid_df = pd.concat([gridgeom, initdf, rst_df], axis=1, sort=False)
grid_df = gridgeom.merge(
initdf, how="outer", on=None, left_index=True, right_index=True
)

if rst_df is not None and not rst_df.empty:
grid_df = grid_df.merge(
rst_df, how="outer", left_index=True, right_on="active"
).reset_index(drop=True)

if dropconstants:
grid_df = drop_constant_columns(grid_df)
return grid_df
return grid_df.drop("active", axis="columns", errors="ignore")


def fill_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
Expand Down
55 changes: 46 additions & 9 deletions tests/test_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def test_df():
grid_df = grid.df(eclfiles, vectors=["PRESSURE"], rstdates="last", stackdates=True)
assert "PRESSURE" in grid_df
assert len(grid_df.columns) == geometry_cols + 2
assert "DATE" in grid_df # awaits stacking
assert "DATE" in grid_df # Present because of stackdates

grid_df = grid.df(eclfiles, vectors="PRESSURE", rstdates="last")
assert "PRESSURE" in grid_df
Expand All @@ -289,11 +289,38 @@ def test_df():
assert "PRESSURE" not in grid_df
assert "PRESSURE@2001-08-01" in grid_df

grid_df = grid.df(eclfiles, vectors="PRESSURE", rstdates="all", stackdates=True)
grid_df = grid.df(
eclfiles, vectors=["PORO", "PRESSURE"], rstdates="all", stackdates=True
)
assert "PRESSURE" in grid_df
assert len(grid_df.columns) == geometry_cols + 2
assert len(grid_df.columns) == geometry_cols + 3
assert "DATE" in grid_df
assert len(grid_df["DATE"].unique()) == 4
assert not grid_df.isna().any().any()
# Check that all but the dynamic data has been repeated:
df1 = (
grid_df[grid_df["DATE"] == "2000-01-01"]
.drop(["DATE", "PRESSURE"], axis=1)
.reset_index(drop=True)
)
df2 = (
grid_df[grid_df["DATE"] == "2000-07-01"]
.drop(["PRESSURE", "DATE"], axis=1)
.reset_index(drop=True)
)
df3 = (
grid_df[grid_df["DATE"] == "2001-02-01"]
.drop(["PRESSURE", "DATE"], axis=1)
.reset_index(drop=True)
)
df4 = (
grid_df[grid_df["DATE"] == "2001-08-01"]
.drop(["PRESSURE", "DATE"], axis=1)
.reset_index(drop=True)
)
pd.testing.assert_frame_equal(df1, df2)
pd.testing.assert_frame_equal(df1, df3)
pd.testing.assert_frame_equal(df1, df4)

grid_df = grid.df(eclfiles, vectors="PORO")
assert "I" in grid_df
Expand All @@ -305,7 +332,7 @@ def test_df():
assert "I" in grid_df
assert "PORO" in grid_df
assert "DATE" not in grid_df
# (no RST columns, so no DATE info in the daaframe)
# (no RST columns, so no DATE info in the dataframe)
# (warnings should be printed)

grid_df = grid.df(eclfiles, vectors="PORO", rstdates="all", stackdates=True)
Expand Down Expand Up @@ -403,9 +430,9 @@ def test_get_available_rst_dates():
def test_rst2df():
"""Test producing dataframes from restart files"""
eclfiles = EclFiles(DATAFILE)
assert grid.rst2df(eclfiles, "first").shape == (35817, 23)
assert grid.rst2df(eclfiles, "last").shape == (35817, 23)
assert grid.rst2df(eclfiles, "all").shape == (35817, 23 * 4)
assert grid.rst2df(eclfiles, "first").shape == (35817, 24)
assert grid.rst2df(eclfiles, "last").shape == (35817, 24)
assert grid.rst2df(eclfiles, "all").shape == (35817, 23 * 4 + 1)

assert "SOIL" in grid.rst2df(eclfiles, date="first", dateinheaders=False)
assert (
Expand All @@ -417,11 +444,21 @@ def test_rst2df():
assert rst_df["DATE"].unique()[0] == "2000-01-01"
rst_df = grid.rst2df(eclfiles, "all", stackdates=True)
assert len(rst_df["DATE"].unique()) == len(grid.get_available_rst_dates(eclfiles))
assert rst_df.shape == (4 * 35817, 23 + 1) # "DATE" is now the extra column

# "DATE" and "active" are now the extra columns:
assert rst_df.shape == (4 * 35817, 23 + 2)

# Test that only the PPCW column contains NaN's (only defined for selected cells)
nancols = rst_df.isna().any()
assert nancols["PPCW"]
assert (
len(rst_df[["PPCW", "DATE"]].dropna()["DATE"].unique()) == 4
) # All dates present
assert sum(nancols) == 1 # All other columns are "False"

# Check vector slicing:
rst_df = grid.rst2df(eclfiles, "first", vectors="S???")
assert rst_df.shape == (35817, 3)
assert rst_df.shape == (35817, 4)
assert "SGAS" in rst_df
assert "SWAT" in rst_df
assert "SOIL" in rst_df # This is actually computed
Expand Down

0 comments on commit be46913

Please # to comment.