import pandas as pd
import flopy
def mftransientlist_to_dataframe(mftransientlist, squeeze=True):
Cast a MFTransientList of stress period data
into single dataframe containing all stress periods. Output data are
aggregated (summed) to the model cell level, to avoid
issues with non-unique row indices.
mftransientlist : instance
squeeze : bool
Reduce number of columns in dataframe to only include
stress periods where a variable changes.
df : dataframe
Dataframe of shape nrow = ncells, ncol = nvar x nper. If
the squeeze option is chosen, nper is the number of
stress periods where at least one cell is different,
otherwise it is equal to the number of keys in
data = mftransientlist
names = ['cellid']
if isinstance(data.package, flopy.mf6.modflow.ModflowGwfmaw):
names += ['wellid']
# monkey patch the mf6 version to behave like the mf2005 version
#if isinstance(mftransientlist,
# = {per: ra for per, ra in enumerate(mftransientlist.array)}
# find relevant variable names
# may have to iterate over the first stress period
varnames = []
for per in range(data.model.nper):
if hasattr(, 'dtype'):
varnames = list([n for n in[per].dtype.names
if n not in ['k', 'i', 'j', 'cellid', 'ifno',
'rno', 'sfrsetting', 'boundname']])
# create list of dataframes for each stress period
# each with index of k, i, j
dfs = []
reconvert_str_index = False
for per, recs in
if recs is None or recs is 0:
# add an empty dataframe if a stress period is
# set to 0 (e.g. no pumping during a predevelopment
# period)
columns = names + list(['{}{}'.format(c, per)
for c in varnames])
dfi = pd.DataFrame(data=None, columns=columns)
dfi = dfi.set_index(names)
dfi = pd.DataFrame.from_records(recs)
# convert layer, row, column to cellid
index_col = 'cellid' # default index
if {'k', 'i', 'j'}.issubset(dfi.columns):
dfi['cellid'] = list(zip(dfi.k, dfi.i, dfi.j))
dfi.drop(['k', 'i', 'j'], axis=1, inplace=True)
# cell-by-cell connections; id is the cellid (id2 cellid of connected cell)
elif 'id' in dfi.columns and 'cellid' not in dfi.columns:
index_col = 'id'
# map the cellid to the reach number (SFR package data)
elif ('rno' in dfi.columns or 'ifno' in dfi.columns) and\
'cellid' not in dfi.columns:
rno_col = {'rno', 'ifno'}.intersection(dfi.columns).pop()
packagedata = data.package.packagedata
pd_rno_col = {'rno', 'ifno'}.intersection(packagedata.columns).pop()
cellid = dict(zip(packagedata.array[pd_rno_col], packagedata.array['cellid']))
dfi['cellid'] = [cellid[rno] for rno in dfi[rno_col]]
cols = [rno_col, 'cellid']
# rearrange the column order to start with rno, cellid
cols = cols + [c for c in dfi.columns if c not in cols]
dfi = dfi[cols]
# index on reach number, allowing for multiple instances of a cellid
# (multiple reaches per cell)
index_col = rno_col
# cast tuple cellids to strings
# so that pd.concat works in pandas >=1.2
if 'cellid' in dfi.columns:
dfi['cellid'] = dfi['cellid'].astype(str)
# flag to convert string index back to tuples
reconvert_str_index = True
dfi.set_index(index_col, drop=False, inplace=True)
# aggregate (sum) data to model cells
# because pd.concat can't handle a non-unique index
# (and modflow input doesn't have a unique identifier at sub-cell level)
if not in {'rno', 'ifno'}:
dfg = dfi.reset_index(drop=True).groupby(index_col)
dfi = dfg.sum() # aggregate
dfi.columns = ['{}{}'.format(c, per) if c in varnames else c for c in dfi.columns]
df = pd.concat(dfs, axis=1)
# squeeze the dataframe down to the minimum number of columns (stress periods)
# to describe changes in stress
# keep only columns where the stress changes
# (assuming that missing columns represent the same stress as the previous column)
# squeeze only the columns with data values
if squeeze and len(varnames) > 0:
keep = []
for var in varnames:
diffcols = list([n for n in df.columns if var in n])
if len(diffcols) > 0:
to_squeeze = df[diffcols].T.astype(float).T
squeezed = squeeze_columns(to_squeeze)
squeezed = pd.concat(keep, axis=1)
squeezed.index = df.index.tolist()
# join the squeezed data back to other columns
other_cols = set()
for c in df.columns:
name = ''.join((char for char in c if not char.isdigit()))
if name not in varnames:
if len(other_cols) > 0:
# compress multiple instances of 'boundname'
# or other auxillary columns to single columns
other_cols_dict = {}
for col in other_cols:
other_cols_dict[col] = pd.DataFrame(df[col]).fillna(method='bfill', axis=1).iloc[:, 0]
other_cols_df = pd.DataFrame(other_cols_dict)
df = other_cols_df.join(squeezed)
# j=2
df = squeezed
# add columns for k, i, j
if reconvert_str_index:
df.index = [eval(s) for s in df.index]
for id in ['cellid', 'id']:
if id not in df.columns and isinstance(df.index.values[0], tuple):
df['cellid'] = df.index
if id in df.columns and isinstance(df[id].values[0], tuple):
cols = df.columns.tolist()
# get the order right
pos = [i for i, c in enumerate(cols) if c == id][0]
for c in reversed(['k', 'i', 'j']):
cols.insert(pos + 1, c)
df['k'], df['i'], df['j'] = list(zip(*df[id]))
df = df[cols]
return df
def squeeze_columns(df, fillna=0.):
"""Drop columns where the forward difference
(along axis 1, the column axis) is 0 in all rows.
In other words, only retain columns where the data
changed in at least one row.
df : DataFrame
Containing homogenous data to be differenced (e.g.,
just flux values, no id or other ancillary information)
fillna : float
Value for nan values in DataFrame
squeezed : DataFrame
df.fillna(fillna, inplace=True)
diff = df.diff(axis=1)
diff[diff.columns[0]] = 1 # always return the first stress period
changed = diff.sum(axis=0) != 0
squeezed = df.loc[:, changed.index[changed]]
return squeezed
def get_tl_variables(mftransientlist):
"""Get variable names in a flopy.utils.MFList or instance
# monkey patch the mf6 version to behave like the mf2005 version
#if isinstance(mftransientlist,
# = {per: ra for per, ra in enumerate(mftransientlist.array)}
non_data_columns = {'k', 'i', 'j', 'cellid', 'rno', 'ifno', 'sfrsetting'}
for per, recarray in
if recarray is not None:
return [c for c in recarray.dtype.names
if c not in non_data_columns]