Source code for mfexport.budget_output

import io
import os
from pathlib import Path
import time
import numpy as np
import pandas as pd
from flopy.discretization import StructuredGrid
from flopy.utils.sfroutputfile import SfrFile
import flopy.utils.binaryfile as bf
from flopy.mf6.utils.binarygrid_util import MfGrdFile
from mfexport.grid import get_kij_from_node3d



[docs]
def aggregate_sfr_flow_ja_face(df):
    """SFR streamflow in/out components are saved in MODFLOW 6
    as FLOW-JA-FACE, where all inflows (positive values) and outflows
    (negative values) are listed for each stream reach (node). This
    method aggregates those results and returns a dataframe
    with one row per reach, with Qin, Qout, Qnet and Qmean_cfd
    and Qmean_cfs columns.

    Parameters
    ----------
    df : DataFrame
        Dataframe produced by read_mf6_binary_stress_output.
        Must have columns:
        node : reach number (rno)
        kstpkper : (timestep, stress period) tuples
        time : total time in model units
        q : FLOW-JA-FACE values (+ in, - out)

    Returns
    -------
    agg : DataFrame
        DataFrame with flow values aggregated by node, with columns:
        kstpkper : (timestep, stress period) tuples
        time : total time in model units
        rno : reach number (1-based)
        Qin : total inflows from other reaches
        Qout : total outflows to other reaches
        Qnet : net gain/loss in flow
        Qmean : average of inflows and outflows
    """
    print('aggregating FLOW-JA-FACE results...')
    ta = time.time()

    # Note: the loop/dictionary approach below
    # is ~20 times faster than a groupby approach
    # specifying the aggregation operations by column
    Qin = {}
    Qout = {}
    timedict = {}
    node = df.node.tolist()
    kstpkper = df.kstpkper.tolist()
    times = df.time.tolist()
    q = df.q.tolist()

    tal = time.time()
    for i in range(len(node)):
        k = (kstpkper[i], node[i])
        qi = q[i]

        qin = Qin.get(k, 0)
        qout = Qout.get(k, 0)
        if qi > 0:
            qin += qi
        elif qi < 0:
            qout += qi

        Qin[k] = qin
        Qout[k] = qout
        timedict[k] = times[i]
    print("loop took {:.2f}s".format(time.time() - tal))

    tal = time.time()
    # casting dicts to list first and then constructing dataframe
    # is about 30x faster than pd.DataFrame.from_dict
    keys = list(Qin.keys())
    Qinl = [Qin[k] for k in keys]
    Qoutl = [Qout[k] for k in keys]
    timel = [timedict[k] for k in keys]
    agg = pd.DataFrame({'Qin': Qinl, 'Qout': Qoutl, 'time': timel}, index=keys)
    agg['kstpkper'] = [i[0] for i in agg.index.values]
    agg['rno'] = [i[1] for i in agg.index.values]
    agg.index = range(len(agg))
    agg = agg[['time', 'kstpkper', 'rno', 'Qin', 'Qout']]
    agg['Qnet'] = agg[['Qin', 'Qout']].sum(axis=1)
    agg['Qmean'] = agg[['Qin', 'Qout']].abs().sum(axis=1) / 2
    print("DataFrame construction took {:.2f}s".format(time.time() - tal))

    print("finished in {:.2f}s\n".format(time.time() - ta))
    return agg.sort_values(by=['time', 'rno'])




[docs]
def aggregate_mf6_stress_budget(mf6_stress_budget_output,
                                text=None,
                                kstpkper=None):
    """Read MODFLOW 6 budget output for a stress package into
    a DataFrame of one node/stress period per row, with fluxes
    listed by column.

    Parameters
    ----------
    mf6_stress_budget_output : file path
        Binary output file
    text : str or list of strings
        Text identifying flow term(s) (e.g. 'FLOW-JA-FACE', 'GWF', etc.).
        If None, all available variables are returned. (default None)

    Returns
    -------
    df : DataFrame
        DataFrame with flow values aggregated by node and stress period.
        Columns derived from FLOW-JA-FACE results are:
        kstpkper : (timestep, stress period) tuples
        time : total time in model units
        node : SFR reach number (zero-based)
        Qin : total inflows from other reaches
        Qout : total outflows to other reaches
        Qnet : net gain/loss in flow
        Qmean : average of inflows and outflows
        Any additional fluxes are listed in columns of the same name.
    """
    print('Getting data from {}...'.format(mf6_stress_budget_output))
    ta = time.time()
    if isinstance(text, str):
        text = [text]
    elif text is None:
        text = get_stress_budget_textlist(mf6_stress_budget_output)
    print('for variables: ' + ' '.join(text))
    dfs = {}
    agg = {}
    for k in text:
        dfs[k] = read_mf6_stress_budget_output(mf6_stress_budget_output, text=k, kstpkper=kstpkper)

        # aggregate FLOW-JA-FACE values to one Qin, Qout value per reach
        if k == 'FLOW-JA-FACE':
            agg[k] = aggregate_sfr_flow_ja_face(dfs['FLOW-JA-FACE'])
            index = pd.MultiIndex.from_tuples(list(zip(agg[k].kstpkper,
                                                       agg[k].rno)),
                                              names=['kstpkper', 'node'])
            agg[k].index = index
        else:
            # get fluxes by reach
            # dfs[k] has one row for each connection
            # reduce to sum of flow terms for each node (i.e. each SFR reach)
            agg[k] = dfs[k].groupby(['kstpkper', 'node']).sum()

    # merge variables into single dataframe
    t = 'FLOW-JA-FACE' if 'FLOW-JA-FACE' in text else text[0]
    text.remove(t)
    df = agg[t]
    for t in text:
        agg[t][t] = agg[t].q  # create column with flux name
        df = df.join(agg[t][t], how='outer') # join on multiindex of kstpkper, node

    # with outer join, ensure that all kstpkper and rno
    # columns fully populated
    df['kstpkper'] = df.index.get_level_values(0)
    df['node'] = df.index.get_level_values(1)
    df.reset_index(inplace=True, drop=True)
    df.sort_values(by=['time', 'node'], inplace=True)
    assert not np.any(df.kstpkper.isna().values)
    #assert not np.any(df.rno.isna().values)  # nan rnos can be unconnected reaches
    
    rno_col = {'rno', 'ifno'}.intersection(df.columns)
    if rno_col:
        rno_col = rno_col.pop()
        #assert np.array_equal(df.node.values, df.rno.values)
        assert np.array_equal(df.dropna(axis=0, subset=['rno'])['node'].values,
                              df.dropna(axis=0, subset=['rno'])[rno_col].values)

        df.drop(rno_col, axis=1, inplace=True)
    # convert to zero-based
    # (rnos in flopy package input are zero-based)
    if df['node'].min() == 1:
        df['node'] -= 1
    df.index = range(len(df))
    print("finished in {:.2f}s\n".format(time.time() - ta))
    return df




[docs]
def get_flowja_face(cell_budget_file, binary_grid_file, kstpkper=(0, 0), idx=0,
                    precision='double'):
    """Get FLOW-JA-FACE (cell by cell flows) from MODFLOW 6 budget
    output and binary grid file.
    TODO: need test for extracted flowja fluxes
    """
    if isinstance(cell_budget_file, str):
        cbb = bf.CellBudgetFile(cell_budget_file)
        if binary_grid_file is None:
            binary_grid_file = cell_budget_file[::-4] + '.dis.grb'
            if not os.path.exists(binary_grid_file):
                binary_grid_file = None
    else:
        cbb = cell_budget_file
    if binary_grid_file is None:
        print("Couldn't get FLOW-JA-FACE, need binary grid file for connection information.")
        return
    bgf = MfGrdFile(binary_grid_file)
    # IA array maps cell number to connection number
    # (one-based index number of first connection at each cell)?
    # taking the forward difference then yields nconnections per cell
    ia = bgf._datadict['IA'] - 1
    # Connections in the JA array correspond directly with the
    # FLOW-JA-FACE record that is written to the budget file.
    ja = bgf._datadict['JA'] - 1  # cell connections
    flowja = cbb.get_data(text='FLOW-JA-FACE')[0][0, 0, :]
    df = get_intercell_connections(ia, ja, flowja)
    cols = ['n', 'm', 'q']

    # get the k, i, j locations for plotting the connections
    if isinstance(bgf.modelgrid, StructuredGrid):
        nlay, nrow, ncol = bgf.modelgrid.nlay, bgf.modelgrid.nrow, bgf.modelgrid.ncol
        k, i, j = get_kij_from_node3d(df['n'].values, nrow, ncol)
        df['kn'], df['in'], df['jn'] = k, i, j
        k, i, j = get_kij_from_node3d(df['m'].values, nrow, ncol)
        df['km'], df['im'], df['jm'] = k, i, j
        df.reset_index()
        cols += ['kn', 'in', 'jn', 'km', 'im', 'jm']
    return df[cols].copy()




[docs]
def get_intercell_connections(ia, ja, flowja):
    print('Making DataFrame of intercell connections...')
    ta = time.time()
    all_n = []
    m = []
    q = []
    for n in range(len(ia)-1):
        for ipos in range(ia[n] + 1, ia[n+1]):
            all_n.append(n)
            m.append(ja[ipos])  # m is the cell that n connects to
            q.append(flowja[ipos])  # flow across the connection
    df = pd.DataFrame({'n': all_n, 'm': m, 'q': q})
    et = time.time() - ta
    print("finished in {:.2f}s\n".format(et))
    return df




[docs]
def get_bc_flux(cbbobj, txt, kstpkper=None, idx=None):
    """Read a flow component from MODFLOW binary cell budget output;

    Parameters
    ----------
    cbbobj : open file handle (instance of flopy.utils.binaryfile.CellBudgetFile
    txt : cell budget record to read (e.g. 'STREAM LEAKAGE')
    kstpkper : tuple
        (timestep, stress period) to read
    idx : index of list returned by cbbobj (usually 0)

    Returns
    -------
    arr : ndarray
    """
    nrow, ncol, nlay = cbbobj.nrow, cbbobj.ncol, cbbobj.nlay
    results_list = cbbobj.get_data(text=txt, kstpkper=kstpkper, idx=idx)
    # this logic needs some cleanup
    #if len(results) > 0:
    #    results = results[0]
    if len(results_list) == 0:
        print('no data found at {} for {}'.format(kstpkper, txt))
        return
    all_results = []
    for i, results in enumerate(results_list):
        if isinstance(results, list) and txt == 'RECHARGE':
            results = results[1]
        if len(results) == 0:
            print(f'no data found at {kstpkper} for {txt}, layer {i}')
            continue
        #if results.shape == (nlay, nrow, ncol):
        #    return results
        #if results.shape == (1, nrow, ncol):
        #    results = results[0]
        #elif results.shape == (nrow, ncol):
        #    return results
        if np.ndim(results) == 1 and \
                len({'node', 'q'}.difference(set(results.dtype.names))) == 0:
            arr = np.zeros(nlay * nrow * ncol, dtype=float)
            arr[results.node - 1] = results.q
            arr = np.reshape(arr, (nlay, nrow, ncol))
            results = arr.sum(axis=0)
        all_results.append(np.squeeze(results))
    return np.squeeze(all_results)




[docs]
def get_stress_budget_textlist(mf6_stress_budget_output):
    """Get list of available variable names in a binary budget output file.
    """
    cbobj = bf.CellBudgetFile(mf6_stress_budget_output,
                              precision='double'
                              )
    textlist = [t.strip().decode() for t in cbobj.textlist]
    return textlist




[docs]
def read_mf6_dependent_variable_output(mf6_dependent_variable_output,
                                       text='head',
                                       kstpkper=None, hdry=-1e30):
    """Reads dependent variable output; for example, heads from the
    groundwater flow solution or output from STAGE FILEOUT in the options
    block of the .sfr6 file). Returns a DataFrame of output values.

    Parameters
    ----------
    mf6_dependent_variable_output : file path
        Binary output file
    text : str
        Text identifying variable (e.g. 'head', 'stage', etc.)
    kstpkper : list of tuples
        List of zero-based (timestep, stress period) tuples. If
        None, all available data are returned. (default None)
    hdry : float
        Value indicating dry cells.

    Returns
    -------
    df : DataFrame
        Table with results. Columns:
        node : Zero-based ID: model cell number for heads, reach number for SFR, etc.
        <text> : results for variable <text>
        kstpkper : (timestep, stress period)
        time : total time in model units
    """
    print('reading {} from\n{}...'.format(text, mf6_dependent_variable_output))
    ta = time.time()
    hdsobj = bf.HeadFile(mf6_dependent_variable_output,
                         text=text) # need to specify text otherwise file may not open
    times = hdsobj.get_times()
    if kstpkper is None:
        kstpkper = hdsobj.get_kstpkper()
    else:
        if not isinstance(kstpkper, list):
            kstpkper = [kstpkper]

    records = []
    for ksp in kstpkper:
        results = hdsobj.get_data(kstpkper=ksp)
        results = np.squeeze(results).ravel().tolist()
        records.append(results)
    nnodes = len(np.squeeze(records[0]).ravel())

    # create dataframe with one result in each row
    # sorted by timestep, then node
    values = []
    kstpkper_values = []
    time_values = []
    for i, rec in enumerate(records):
        values += records[i]
        kstpkper_values += [kstpkper[i]] * nnodes
        time_values += [times[i]] * nnodes
    df = pd.DataFrame({'node': list(range(nnodes)) * len(records),
                       text: values,
                       'kstpkper': kstpkper_values,
                       'time': time_values})
    df.loc[df[text] == hdry, text] = np.nan
    print("finished in {:.2f}s\n".format(time.time() - ta))
    return df[['time', 'kstpkper', 'node', text]]




[docs]
def read_mf6_stress_budget_output(mf6_stress_budget_output,
                                  text='FLOW-JA-FACE',
                                  kstpkper=None):
    """Reads budget output from any package that follows the imeth=6
    structure (e.g. LAK, MAW, SFR, and UZF package(s); for example,
    output from BUDGET FILEOUT in the options block of the .sfr6 file).

    Parameters
    ----------
    mf6_stress_budget_output : file path
        Binary output file
    text : str
        Text identifying flow term (e.g. 'FLOW-JA-FACE')

    Returns
    -------
    df : DataFrame
        Table with flow results. Columns:
        node : node number (e.g. stream reach; 1-based)
        node2 : connecting node (e.g. up or downstream reach; 1-based)
        q : flow values
        FLOW-AREA : area of JA-FACE?
        kstpkper : (timestep, stress period)
        time : total time in model units
    """
    print('reading {} from\n{}...'.format(text, mf6_stress_budget_output))
    ta = time.time()
    cbobj = bf.CellBudgetFile(mf6_stress_budget_output,
                              precision='double'
                              )
    times = cbobj.get_times()
    dfs = []
    if kstpkper is None:
        kstpkper = cbobj.get_kstpkper()
        # returns a list of recarrays (length: nnodes);
        # one for each timestep, stress period
        records = cbobj.get_data(text=text)
    else:
        # otherwise, just get the results
        # for specified timestep, stress periods
        if not isinstance(kstpkper, list):
            kstpkper = [kstpkper]
        records = []
        for ksp in kstpkper:
            records += cbobj.get_data(text=text, kstpkper=ksp)

    for i, rec in enumerate(records):
        df = pd.DataFrame(rec)
        df['kstpkper'] = [kstpkper[i]] * len(df)
        df['time'] = [times[i]] * len(df)
        dfs.append(df.copy())
    df = pd.concat(dfs)
    print("finished in {:.2f}s\n".format(time.time() - ta))
    return df.sort_values(by=['time', 'node'])




[docs]
def read_sfr_output(mf2005_sfr_outputfile=None,
                    mf2005_SfrFile_instance=None,
                    mf6_sfr_stage_file=None,
                    mf6_sfr_budget_file=None,
                    mf6_package_data=None,
                    model=None, grid_type='structured'):
    """Read MF-2005 or MF-6 style SFR output; return as DataFrame.
    """
    model_version = None
    packagedata = None
    if model is not None:
        model_version = model.version
        if model_version == 'mf6':
            packagedata = pd.DataFrame(model.sfr.packagedata.array.copy())
    elif mf6_package_data is not None:
        model_version = 'mf6'
        if isinstance(mf6_package_data, str) or isinstance(mf6_package_data, Path):
            
            skiprows = 0
            names = None
            with open(mf6_package_data) as src:
                for line in src:
                    if line.strip().startswith('#'):
                        names = line.strip().split()
                        skiprows += 1
                    else:
                        ncol = len(line.strip().split())
                        break
                        
            if names is None:
                if grid_type == 'structured':
                    names = ['rno', 'k', 'i', 'j', 'rlen', 'rwid', 'rgrd', 'rtp', 'rbth', 'rhk',
                            'man', 'ncon', 'ustrf', 'ndv']
                else:
                    names = ['rno', 'cellid', 'rlen', 'rwid', 'rgrd', 'rtp', 'rbth', 'rhk',
                            'man', 'ncon', 'ustrf', 'ndv']
                for i, _ in enumerate(range(len(names), ncol)):
                    names.append(f'aux_col{i+1}')
            else:
                names[0] = names[0].strip('#')
            
            # read the packagedata as a string to handle "none" values
            with open(mf6_package_data) as src:
                raw_pd = src.read()
                raw_pd = raw_pd.lower().replace('none', '0 0 0')
            packagedata = pd.read_csv(io.StringIO(raw_pd), names=names, 
                                      skiprows=skiprows, delim_whitespace=True)
            for col in ['rno', 'ifno', 'k', 'i', 'j']:
                if col in packagedata:
                    packagedata[col] -= 1
            if 'cellid' in packagedata.columns:
                if not isinstance(packagedata['cellid'][0], int):
                    packagedata['cellid'] = [(c[0]-1, c[1] -1, c[2] -1) for c in packagedata['cellid']]
                else:
                    packagedata['cellid'] -=1
        else:
            # make the dataframe on the .array attribute for flopy objects
            # or mf6_package_data is assumed to be array-like
            packagedata = pd.DataFrame(getattr(mf6_package_data, 'array', mf6_package_data))

    if model_version == 'mf6':

        # get the budget output
        df = aggregate_mf6_stress_budget(mf6_sfr_budget_file)

        # get the stage data
        if mf6_sfr_stage_file is not None:
            stg = read_mf6_dependent_variable_output(mf6_sfr_stage_file,
                                                     text='stage')
            df.sort_values(by=['kstpkper', 'node'], inplace=True)
            stg.sort_values(by=['kstpkper', 'node'], inplace=True)
            df.set_index(['kstpkper', 'node'], inplace=True)
            stg.set_index(['kstpkper', 'node'], inplace=True)
            na_reaches = np.isnan(df.time.values)
            #df.loc[~na_reaches].to_csv('df.csv')
            #stg.loc[~na_reaches].to_csv('stg.csv')
            #assert np.allclose(df.time.values, stg.time.values)
            assert np.allclose(df.loc[~na_reaches].time.values,
                               stg.loc[~na_reaches].time.values)
            assert np.array_equal(df.index, stg.index)
            df['stage'] = stg['stage']
            df.reset_index(inplace=True)
        else:
            df['stage'] = np.nan

        # get the row, column location of SFR cells;
        # compute stream depths
        if packagedata is not None:
            rd = packagedata
            rno_col = {'rno', 'ifno'}.intersection(rd.columns).pop()
            # convert reach number to zero-based
            if rd[rno_col].min() == 1:     
                rd[rno_col] -= 1
            assert rd[rno_col].min() == 0
            assert df.node.min() == 0
                
            strtop_col = {'rtp', 'strtop'}.intersection(rd.columns).pop()
            rno_strtop = dict(zip(rd[rno_col], rd[strtop_col]))
            df['strtop'] = pd.to_numeric([rno_strtop[rno] for rno in df.node.values], errors='coerce')
            # fill nan stages with their streambed tops
            isna = df['stage'].isna()
            df.loc[isna, 'stage'] = df.loc[isna, 'strtop']
            df['depth'] = df['stage'] - df['strtop']
                
            if 'cellid' not in rd.columns:
                rd['cellid'] = list(zip(rd['k'], rd['i'], rd['j']))
                
            rno_cellid = dict(zip(rd[rno_col], rd.cellid))
            for i, dim in enumerate(['k', 'i', 'j']):
                df[dim] = pd.to_numeric([rno_cellid[rno][i] for rno in df.node.values], errors='coerce')
            df.dropna(subset=['k', 'i', 'j'], axis=0, inplace=True)
            # can't convert to integers if nans are present
            for dim in ['k', 'i', 'j']:
                df[dim] = df[dim].astype(int)
                assert 'int' in df[dim].dtype.name


    else:
        # SFR output
        if mf2005_sfr_outputfile is not None:
            sfrobj = SfrFile(mf2005_sfr_outputfile)
        elif mf2005_SfrFile_instance is not None:
            sfrobj = mf2005_SfrFile_instance
        else:
            print('Need path to SFR tabular budget output or FloPy SfrFile instance.')

        df = sfrobj.df.copy()
        df.sort_values(by=['segment', 'reach'], inplace=True)

    return df