import pandas as pd
import flopy
[docs]
def mftransientlist_to_dataframe(mftransientlist, squeeze=True):
"""
Cast a MFTransientList of stress period data
into single dataframe containing all stress periods. Output data are
aggregated (summed) to the model cell level, to avoid
issues with non-unique row indices.
Parameters
----------
mftransientlist : flopy.mf6.data.mfdatalist.MFTransientList instance
squeeze : bool
Reduce number of columns in dataframe to only include
stress periods where a variable changes.
Returns
-------
df : dataframe
Dataframe of shape nrow = ncells, ncol = nvar x nper. If
the squeeze option is chosen, nper is the number of
stress periods where at least one cell is different,
otherwise it is equal to the number of keys in MfList.data.
"""
data = mftransientlist
names = ['cellid']
if isinstance(data.package, flopy.mf6.modflow.ModflowGwfmaw):
names += ['wellid']
# monkey patch the mf6 version to behave like the mf2005 version
#if isinstance(mftransientlist, flopy.mf6.data.mfdatalist.MFTransientList):
# mftransientlist.data = {per: ra for per, ra in enumerate(mftransientlist.array)}
# find relevant variable names
# may have to iterate over the first stress period
varnames = []
for per in range(data.model.nper):
if hasattr(data.data.get(per), 'dtype'):
varnames = list([n for n in data.data[per].dtype.names
if n not in ['k', 'i', 'j', 'cellid', 'ifno',
'rno', 'sfrsetting', 'boundname']])
break
# create list of dataframes for each stress period
# each with index of k, i, j
dfs = []
reconvert_str_index = False
for per, recs in data.data.items():
if recs is None or recs is 0:
# add an empty dataframe if a stress period is
# set to 0 (e.g. no pumping during a predevelopment
# period)
columns = names + list(['{}{}'.format(c, per)
for c in varnames])
dfi = pd.DataFrame(data=None, columns=columns)
dfi = dfi.set_index(names)
else:
dfi = pd.DataFrame.from_records(recs)
# convert layer, row, column to cellid
index_col = 'cellid' # default index
if {'k', 'i', 'j'}.issubset(dfi.columns):
dfi['cellid'] = list(zip(dfi.k, dfi.i, dfi.j))
dfi.drop(['k', 'i', 'j'], axis=1, inplace=True)
# cell-by-cell connections; id is the cellid (id2 cellid of connected cell)
elif 'id' in dfi.columns and 'cellid' not in dfi.columns:
index_col = 'id'
# map the cellid to the reach number (SFR package data)
elif ('rno' in dfi.columns or 'ifno' in dfi.columns) and\
'cellid' not in dfi.columns:
rno_col = {'rno', 'ifno'}.intersection(dfi.columns).pop()
packagedata = data.package.packagedata
pd_rno_col = {'rno', 'ifno'}.intersection(packagedata.columns).pop()
cellid = dict(zip(packagedata.array[pd_rno_col], packagedata.array['cellid']))
dfi['cellid'] = [cellid[rno] for rno in dfi[rno_col]]
cols = [rno_col, 'cellid']
# rearrange the column order to start with rno, cellid
cols = cols + [c for c in dfi.columns if c not in cols]
dfi = dfi[cols]
# index on reach number, allowing for multiple instances of a cellid
# (multiple reaches per cell)
index_col = rno_col
# cast tuple cellids to strings
# so that pd.concat works in pandas >=1.2
if 'cellid' in dfi.columns:
dfi['cellid'] = dfi['cellid'].astype(str)
# flag to convert string index back to tuples
reconvert_str_index = True
dfi.set_index(index_col, drop=False, inplace=True)
# aggregate (sum) data to model cells
# because pd.concat can't handle a non-unique index
# (and modflow input doesn't have a unique identifier at sub-cell level)
if dfi.index.name not in {'rno', 'ifno'}:
try:
dfg = dfi.reset_index(drop=True).groupby(index_col)
except:
j=2
dfi = dfg.sum() # aggregate
dfi.columns = ['{}{}'.format(c, per) if c in varnames else c for c in dfi.columns]
dfs.append(dfi)
df = pd.concat(dfs, axis=1)
# squeeze the dataframe down to the minimum number of columns (stress periods)
# to describe changes in stress
# keep only columns where the stress changes
# (assuming that missing columns represent the same stress as the previous column)
# squeeze only the columns with data values
if squeeze and len(varnames) > 0:
keep = []
for var in varnames:
diffcols = list([n for n in df.columns if var in n])
if len(diffcols) > 0:
to_squeeze = df[diffcols].T.astype(float).T
squeezed = squeeze_columns(to_squeeze)
keep.append(squeezed)
squeezed = pd.concat(keep, axis=1)
squeezed.index = df.index.tolist()
# join the squeezed data back to other columns
other_cols = set()
for c in df.columns:
name = ''.join((char for char in c if not char.isdigit()))
if name not in varnames:
other_cols.add(name)
if len(other_cols) > 0:
# compress multiple instances of 'boundname'
# or other auxillary columns to single columns
other_cols_dict = {}
for col in other_cols:
other_cols_dict[col] = pd.DataFrame(df[col]).fillna(method='bfill', axis=1).iloc[:, 0]
other_cols_df = pd.DataFrame(other_cols_dict)
#try:
df = other_cols_df.join(squeezed)
#except:
# j=2
else:
df = squeezed
# add columns for k, i, j
if reconvert_str_index:
df.index = [eval(s) for s in df.index]
for id in ['cellid', 'id']:
if id not in df.columns and isinstance(df.index.values[0], tuple):
df['cellid'] = df.index
if id in df.columns and isinstance(df[id].values[0], tuple):
cols = df.columns.tolist()
# get the order right
pos = [i for i, c in enumerate(cols) if c == id][0]
for c in reversed(['k', 'i', 'j']):
cols.insert(pos + 1, c)
df['k'], df['i'], df['j'] = list(zip(*df[id]))
df = df[cols]
return df
[docs]
def squeeze_columns(df, fillna=0.):
"""Drop columns where the forward difference
(along axis 1, the column axis) is 0 in all rows.
In other words, only retain columns where the data
changed in at least one row.
Parameters
----------
df : DataFrame
Containing homogenous data to be differenced (e.g.,
just flux values, no id or other ancillary information)
fillna : float
Value for nan values in DataFrame
Returns
-------
squeezed : DataFrame
"""
df.fillna(fillna, inplace=True)
diff = df.diff(axis=1)
diff[diff.columns[0]] = 1 # always return the first stress period
changed = diff.sum(axis=0) != 0
squeezed = df.loc[:, changed.index[changed]]
return squeezed
[docs]
def get_tl_variables(mftransientlist):
"""Get variable names in a flopy.utils.MFList or
flopy.mf6.data.mfdatalist.MFTransientList instance
"""
# monkey patch the mf6 version to behave like the mf2005 version
#if isinstance(mftransientlist, flopy.mf6.data.mfdatalist.MFTransientList):
# mftransientlist.data = {per: ra for per, ra in enumerate(mftransientlist.array)}
non_data_columns = {'k', 'i', 'j', 'cellid', 'rno', 'ifno', 'sfrsetting'}
for per, recarray in mftransientlist.data.items():
if recarray is not None:
return [c for c in recarray.dtype.names
if c not in non_data_columns]