Source code for mfexport.list_export

import pandas as pd
import flopy



[docs]
def mftransientlist_to_dataframe(mftransientlist, squeeze=True):
    """
    Cast a MFTransientList of stress period data
    into single dataframe containing all stress periods. Output data are
    aggregated (summed) to the model cell level, to avoid
    issues with non-unique row indices.

    Parameters
    ----------
    mftransientlist : flopy.mf6.data.mfdatalist.MFTransientList instance
    squeeze : bool
        Reduce number of columns in dataframe to only include
        stress periods where a variable changes.

    Returns
    -------
    df : dataframe
        Dataframe of shape nrow = ncells, ncol = nvar x nper. If
        the squeeze option is chosen, nper is the number of
        stress periods where at least one cell is different,
        otherwise it is equal to the number of keys in MfList.data.
    """

    data = mftransientlist
    names = ['cellid']
    if isinstance(data.package, flopy.mf6.modflow.ModflowGwfmaw):
        names += ['wellid']

    # monkey patch the mf6 version to behave like the mf2005 version
    #if isinstance(mftransientlist, flopy.mf6.data.mfdatalist.MFTransientList):
    #    mftransientlist.data = {per: ra for per, ra in enumerate(mftransientlist.array)}

    # find relevant variable names
    # may have to iterate over the first stress period
    varnames = []
    for per in range(data.model.nper):
        if hasattr(data.data.get(per), 'dtype'):
            varnames = list([n for n in data.data[per].dtype.names
                             if n not in ['k', 'i', 'j', 'cellid', 'ifno',
                                          'rno', 'sfrsetting', 'boundname']])
            break

    # create list of dataframes for each stress period
    # each with index of k, i, j
    dfs = []
    reconvert_str_index = False
    for per, recs in data.data.items():

        if recs is None or recs is 0:
            # add an empty dataframe if a stress period is
            # set to 0 (e.g. no pumping during a predevelopment
            # period)
            columns = names + list(['{}{}'.format(c, per)
                                    for c in varnames])
            dfi = pd.DataFrame(data=None, columns=columns)
            dfi = dfi.set_index(names)
        else:
            dfi = pd.DataFrame.from_records(recs)
            # convert layer, row, column to cellid
            index_col = 'cellid'  # default index
            if {'k', 'i', 'j'}.issubset(dfi.columns):
                dfi['cellid'] = list(zip(dfi.k, dfi.i, dfi.j))
                dfi.drop(['k', 'i', 'j'], axis=1, inplace=True)
            # cell-by-cell connections; id is the cellid (id2 cellid of connected cell)
            elif 'id' in dfi.columns and 'cellid' not in dfi.columns:
                index_col = 'id'
            # map the cellid to the reach number (SFR package data)
            elif ('rno' in dfi.columns or 'ifno' in dfi.columns) and\
                'cellid' not in dfi.columns:
                rno_col = {'rno', 'ifno'}.intersection(dfi.columns).pop()
                packagedata = data.package.packagedata
                pd_rno_col = {'rno', 'ifno'}.intersection(packagedata.columns).pop()
                cellid = dict(zip(packagedata.array[pd_rno_col], packagedata.array['cellid']))
                dfi['cellid'] = [cellid[rno] for rno in dfi[rno_col]]
                cols = [rno_col, 'cellid']
                # rearrange the column order to start with rno, cellid
                cols = cols + [c for c in dfi.columns if c not in cols]
                dfi = dfi[cols]
                # index on reach number, allowing for multiple instances of a cellid
                # (multiple reaches per cell)
                index_col = rno_col
            # cast tuple cellids to strings
            # so that pd.concat works in pandas >=1.2
            if 'cellid' in dfi.columns:
                dfi['cellid'] = dfi['cellid'].astype(str)
                # flag to convert string index back to tuples
                reconvert_str_index = True

            dfi.set_index(index_col, drop=False, inplace=True)

            # aggregate (sum) data to model cells
            # because pd.concat can't handle a non-unique index
            # (and modflow input doesn't have a unique identifier at sub-cell level)
            if dfi.index.name not in {'rno', 'ifno'}:
                try:
                    dfg = dfi.reset_index(drop=True).groupby(index_col)
                except:
                    j=2
                dfi = dfg.sum()  # aggregate
            dfi.columns = ['{}{}'.format(c, per) if c in varnames else c for c in dfi.columns]
        dfs.append(dfi)
    df = pd.concat(dfs, axis=1)
    # squeeze the dataframe down to the minimum number of columns (stress periods)
    # to describe changes in stress
    # keep only columns where the stress changes
    # (assuming that missing columns represent the same stress as the previous column)
    # squeeze only the columns with data values
    if squeeze and len(varnames) > 0:
        keep = []
        for var in varnames:
            diffcols = list([n for n in df.columns if var in n])
            if len(diffcols) > 0:
                to_squeeze = df[diffcols].T.astype(float).T
                squeezed = squeeze_columns(to_squeeze)
                keep.append(squeezed)
        squeezed = pd.concat(keep, axis=1)
        squeezed.index = df.index.tolist()
        # join the squeezed data back to other columns
        other_cols = set()
        for c in df.columns:
            name = ''.join((char for char in c if not char.isdigit()))
            if name not in varnames:
                other_cols.add(name)

        if len(other_cols) > 0:
            # compress multiple instances of 'boundname'
            # or other auxillary columns to single columns
            other_cols_dict = {}
            for col in other_cols:
                other_cols_dict[col] = pd.DataFrame(df[col]).fillna(method='bfill', axis=1).iloc[:, 0]
            other_cols_df = pd.DataFrame(other_cols_dict)
            #try:
            df = other_cols_df.join(squeezed)
            #except:
            #    j=2
        else:
            df = squeezed
    # add columns for k, i, j
    if reconvert_str_index:
        df.index = [eval(s) for s in df.index]
    for id in ['cellid', 'id']:
        if id not in df.columns and isinstance(df.index.values[0], tuple):
            df['cellid'] = df.index
        if id in df.columns and isinstance(df[id].values[0], tuple):
            cols = df.columns.tolist()
            # get the order right
            pos = [i for i, c in enumerate(cols) if c == id][0]
            for c in reversed(['k', 'i', 'j']):
                cols.insert(pos + 1, c)
            df['k'], df['i'], df['j'] = list(zip(*df[id]))
            df = df[cols]
    return df




[docs]
def squeeze_columns(df, fillna=0.):
    """Drop columns where the forward difference
    (along axis 1, the column axis) is 0 in all rows.
    In other words, only retain columns where the data
    changed in at least one row.

    Parameters
    ----------
    df : DataFrame
        Containing homogenous data to be differenced (e.g.,
        just flux values, no id or other ancillary information)
    fillna : float
        Value for nan values in DataFrame
    Returns
    -------
    squeezed : DataFrame

    """
    df.fillna(fillna, inplace=True)
    diff = df.diff(axis=1)
    diff[diff.columns[0]] = 1  # always return the first stress period
    changed = diff.sum(axis=0) != 0
    squeezed = df.loc[:, changed.index[changed]]
    return squeezed




[docs]
def get_tl_variables(mftransientlist):
    """Get variable names in a flopy.utils.MFList or
    flopy.mf6.data.mfdatalist.MFTransientList instance
    """
    # monkey patch the mf6 version to behave like the mf2005 version
    #if isinstance(mftransientlist, flopy.mf6.data.mfdatalist.MFTransientList):
    #    mftransientlist.data = {per: ra for per, ra in enumerate(mftransientlist.array)}
    non_data_columns = {'k', 'i', 'j', 'cellid', 'rno', 'ifno', 'sfrsetting'}

    for per, recarray in mftransientlist.data.items():
        if recarray is not None:
            return [c for c in recarray.dtype.names
                    if c not in non_data_columns]