import io
import os
from pathlib import Path
import time
import numpy as np
import pandas as pd
from flopy.discretization import StructuredGrid
from flopy.utils.sfroutputfile import SfrFile
import flopy.utils.binaryfile as bf
from flopy.mf6.utils.binarygrid_util import MfGrdFile
from mfexport.grid import get_kij_from_node3d
[docs]
def aggregate_sfr_flow_ja_face(df):
"""SFR streamflow in/out components are saved in MODFLOW 6
as FLOW-JA-FACE, where all inflows (positive values) and outflows
(negative values) are listed for each stream reach (node). This
method aggregates those results and returns a dataframe
with one row per reach, with Qin, Qout, Qnet and Qmean_cfd
and Qmean_cfs columns.
Parameters
----------
df : DataFrame
Dataframe produced by read_mf6_binary_stress_output.
Must have columns:
node : reach number (rno)
kstpkper : (timestep, stress period) tuples
time : total time in model units
q : FLOW-JA-FACE values (+ in, - out)
Returns
-------
agg : DataFrame
DataFrame with flow values aggregated by node, with columns:
kstpkper : (timestep, stress period) tuples
time : total time in model units
rno : reach number (1-based)
Qin : total inflows from other reaches
Qout : total outflows to other reaches
Qnet : net gain/loss in flow
Qmean : average of inflows and outflows
"""
print('aggregating FLOW-JA-FACE results...')
ta = time.time()
# Note: the loop/dictionary approach below
# is ~20 times faster than a groupby approach
# specifying the aggregation operations by column
Qin = {}
Qout = {}
timedict = {}
node = df.node.tolist()
kstpkper = df.kstpkper.tolist()
times = df.time.tolist()
q = df.q.tolist()
tal = time.time()
for i in range(len(node)):
k = (kstpkper[i], node[i])
qi = q[i]
qin = Qin.get(k, 0)
qout = Qout.get(k, 0)
if qi > 0:
qin += qi
elif qi < 0:
qout += qi
Qin[k] = qin
Qout[k] = qout
timedict[k] = times[i]
print("loop took {:.2f}s".format(time.time() - tal))
tal = time.time()
# casting dicts to list first and then constructing dataframe
# is about 30x faster than pd.DataFrame.from_dict
keys = list(Qin.keys())
Qinl = [Qin[k] for k in keys]
Qoutl = [Qout[k] for k in keys]
timel = [timedict[k] for k in keys]
agg = pd.DataFrame({'Qin': Qinl, 'Qout': Qoutl, 'time': timel}, index=keys)
agg['kstpkper'] = [i[0] for i in agg.index.values]
agg['rno'] = [i[1] for i in agg.index.values]
agg.index = range(len(agg))
agg = agg[['time', 'kstpkper', 'rno', 'Qin', 'Qout']]
agg['Qnet'] = agg[['Qin', 'Qout']].sum(axis=1)
agg['Qmean'] = agg[['Qin', 'Qout']].abs().sum(axis=1) / 2
print("DataFrame construction took {:.2f}s".format(time.time() - tal))
print("finished in {:.2f}s\n".format(time.time() - ta))
return agg.sort_values(by=['time', 'rno'])
[docs]
def aggregate_mf6_stress_budget(mf6_stress_budget_output,
text=None,
kstpkper=None):
"""Read MODFLOW 6 budget output for a stress package into
a DataFrame of one node/stress period per row, with fluxes
listed by column.
Parameters
----------
mf6_stress_budget_output : file path
Binary output file
text : str or list of strings
Text identifying flow term(s) (e.g. 'FLOW-JA-FACE', 'GWF', etc.).
If None, all available variables are returned. (default None)
Returns
-------
df : DataFrame
DataFrame with flow values aggregated by node and stress period.
Columns derived from FLOW-JA-FACE results are:
kstpkper : (timestep, stress period) tuples
time : total time in model units
node : SFR reach number (zero-based)
Qin : total inflows from other reaches
Qout : total outflows to other reaches
Qnet : net gain/loss in flow
Qmean : average of inflows and outflows
Any additional fluxes are listed in columns of the same name.
"""
print('Getting data from {}...'.format(mf6_stress_budget_output))
ta = time.time()
if isinstance(text, str):
text = [text]
elif text is None:
text = get_stress_budget_textlist(mf6_stress_budget_output)
print('for variables: ' + ' '.join(text))
dfs = {}
agg = {}
for k in text:
dfs[k] = read_mf6_stress_budget_output(mf6_stress_budget_output, text=k, kstpkper=kstpkper)
# aggregate FLOW-JA-FACE values to one Qin, Qout value per reach
if k == 'FLOW-JA-FACE':
agg[k] = aggregate_sfr_flow_ja_face(dfs['FLOW-JA-FACE'])
index = pd.MultiIndex.from_tuples(list(zip(agg[k].kstpkper,
agg[k].rno)),
names=['kstpkper', 'node'])
agg[k].index = index
else:
# get fluxes by reach
# dfs[k] has one row for each connection
# reduce to sum of flow terms for each node (i.e. each SFR reach)
agg[k] = dfs[k].groupby(['kstpkper', 'node']).sum()
# merge variables into single dataframe
t = 'FLOW-JA-FACE' if 'FLOW-JA-FACE' in text else text[0]
text.remove(t)
df = agg[t]
for t in text:
agg[t][t] = agg[t].q # create column with flux name
df = df.join(agg[t][t], how='outer') # join on multiindex of kstpkper, node
# with outer join, ensure that all kstpkper and rno
# columns fully populated
df['kstpkper'] = df.index.get_level_values(0)
df['node'] = df.index.get_level_values(1)
df.reset_index(inplace=True, drop=True)
df.sort_values(by=['time', 'node'], inplace=True)
assert not np.any(df.kstpkper.isna().values)
#assert not np.any(df.rno.isna().values) # nan rnos can be unconnected reaches
rno_col = {'rno', 'ifno'}.intersection(df.columns)
if rno_col:
rno_col = rno_col.pop()
#assert np.array_equal(df.node.values, df.rno.values)
assert np.array_equal(df.dropna(axis=0, subset=['rno'])['node'].values,
df.dropna(axis=0, subset=['rno'])[rno_col].values)
df.drop(rno_col, axis=1, inplace=True)
# convert to zero-based
# (rnos in flopy package input are zero-based)
if df['node'].min() == 1:
df['node'] -= 1
df.index = range(len(df))
print("finished in {:.2f}s\n".format(time.time() - ta))
return df
[docs]
def get_flowja_face(cell_budget_file, binary_grid_file, kstpkper=(0, 0), idx=0,
precision='double'):
"""Get FLOW-JA-FACE (cell by cell flows) from MODFLOW 6 budget
output and binary grid file.
TODO: need test for extracted flowja fluxes
"""
if isinstance(cell_budget_file, str):
cbb = bf.CellBudgetFile(cell_budget_file)
if binary_grid_file is None:
binary_grid_file = cell_budget_file[::-4] + '.dis.grb'
if not os.path.exists(binary_grid_file):
binary_grid_file = None
else:
cbb = cell_budget_file
if binary_grid_file is None:
print("Couldn't get FLOW-JA-FACE, need binary grid file for connection information.")
return
bgf = MfGrdFile(binary_grid_file)
# IA array maps cell number to connection number
# (one-based index number of first connection at each cell)?
# taking the forward difference then yields nconnections per cell
ia = bgf._datadict['IA'] - 1
# Connections in the JA array correspond directly with the
# FLOW-JA-FACE record that is written to the budget file.
ja = bgf._datadict['JA'] - 1 # cell connections
flowja = cbb.get_data(text='FLOW-JA-FACE')[0][0, 0, :]
df = get_intercell_connections(ia, ja, flowja)
cols = ['n', 'm', 'q']
# get the k, i, j locations for plotting the connections
if isinstance(bgf.modelgrid, StructuredGrid):
nlay, nrow, ncol = bgf.modelgrid.nlay, bgf.modelgrid.nrow, bgf.modelgrid.ncol
k, i, j = get_kij_from_node3d(df['n'].values, nrow, ncol)
df['kn'], df['in'], df['jn'] = k, i, j
k, i, j = get_kij_from_node3d(df['m'].values, nrow, ncol)
df['km'], df['im'], df['jm'] = k, i, j
df.reset_index()
cols += ['kn', 'in', 'jn', 'km', 'im', 'jm']
return df[cols].copy()
[docs]
def get_intercell_connections(ia, ja, flowja):
print('Making DataFrame of intercell connections...')
ta = time.time()
all_n = []
m = []
q = []
for n in range(len(ia)-1):
for ipos in range(ia[n] + 1, ia[n+1]):
all_n.append(n)
m.append(ja[ipos]) # m is the cell that n connects to
q.append(flowja[ipos]) # flow across the connection
df = pd.DataFrame({'n': all_n, 'm': m, 'q': q})
et = time.time() - ta
print("finished in {:.2f}s\n".format(et))
return df
[docs]
def get_bc_flux(cbbobj, txt, kstpkper=None, idx=None):
"""Read a flow component from MODFLOW binary cell budget output;
Parameters
----------
cbbobj : open file handle (instance of flopy.utils.binaryfile.CellBudgetFile
txt : cell budget record to read (e.g. 'STREAM LEAKAGE')
kstpkper : tuple
(timestep, stress period) to read
idx : index of list returned by cbbobj (usually 0)
Returns
-------
arr : ndarray
"""
nrow, ncol, nlay = cbbobj.nrow, cbbobj.ncol, cbbobj.nlay
results_list = cbbobj.get_data(text=txt, kstpkper=kstpkper, idx=idx)
# this logic needs some cleanup
#if len(results) > 0:
# results = results[0]
if len(results_list) == 0:
print('no data found at {} for {}'.format(kstpkper, txt))
return
all_results = []
for i, results in enumerate(results_list):
if isinstance(results, list) and txt == 'RECHARGE':
results = results[1]
if len(results) == 0:
print(f'no data found at {kstpkper} for {txt}, layer {i}')
continue
#if results.shape == (nlay, nrow, ncol):
# return results
#if results.shape == (1, nrow, ncol):
# results = results[0]
#elif results.shape == (nrow, ncol):
# return results
if np.ndim(results) == 1 and \
len({'node', 'q'}.difference(set(results.dtype.names))) == 0:
arr = np.zeros(nlay * nrow * ncol, dtype=float)
arr[results.node - 1] = results.q
arr = np.reshape(arr, (nlay, nrow, ncol))
results = arr.sum(axis=0)
all_results.append(np.squeeze(results))
return np.squeeze(all_results)
[docs]
def get_stress_budget_textlist(mf6_stress_budget_output):
"""Get list of available variable names in a binary budget output file.
"""
cbobj = bf.CellBudgetFile(mf6_stress_budget_output,
precision='double'
)
textlist = [t.strip().decode() for t in cbobj.textlist]
return textlist
[docs]
def read_mf6_dependent_variable_output(mf6_dependent_variable_output,
text='head',
kstpkper=None, hdry=-1e30):
"""Reads dependent variable output; for example, heads from the
groundwater flow solution or output from STAGE FILEOUT in the options
block of the .sfr6 file). Returns a DataFrame of output values.
Parameters
----------
mf6_dependent_variable_output : file path
Binary output file
text : str
Text identifying variable (e.g. 'head', 'stage', etc.)
kstpkper : list of tuples
List of zero-based (timestep, stress period) tuples. If
None, all available data are returned. (default None)
hdry : float
Value indicating dry cells.
Returns
-------
df : DataFrame
Table with results. Columns:
node : Zero-based ID: model cell number for heads, reach number for SFR, etc.
<text> : results for variable <text>
kstpkper : (timestep, stress period)
time : total time in model units
"""
print('reading {} from\n{}...'.format(text, mf6_dependent_variable_output))
ta = time.time()
hdsobj = bf.HeadFile(mf6_dependent_variable_output,
text=text) # need to specify text otherwise file may not open
times = hdsobj.get_times()
if kstpkper is None:
kstpkper = hdsobj.get_kstpkper()
else:
if not isinstance(kstpkper, list):
kstpkper = [kstpkper]
records = []
for ksp in kstpkper:
results = hdsobj.get_data(kstpkper=ksp)
results = np.squeeze(results).ravel().tolist()
records.append(results)
nnodes = len(np.squeeze(records[0]).ravel())
# create dataframe with one result in each row
# sorted by timestep, then node
values = []
kstpkper_values = []
time_values = []
for i, rec in enumerate(records):
values += records[i]
kstpkper_values += [kstpkper[i]] * nnodes
time_values += [times[i]] * nnodes
df = pd.DataFrame({'node': list(range(nnodes)) * len(records),
text: values,
'kstpkper': kstpkper_values,
'time': time_values})
df.loc[df[text] == hdry, text] = np.nan
print("finished in {:.2f}s\n".format(time.time() - ta))
return df[['time', 'kstpkper', 'node', text]]
[docs]
def read_mf6_stress_budget_output(mf6_stress_budget_output,
text='FLOW-JA-FACE',
kstpkper=None):
"""Reads budget output from any package that follows the imeth=6
structure (e.g. LAK, MAW, SFR, and UZF package(s); for example,
output from BUDGET FILEOUT in the options block of the .sfr6 file).
Parameters
----------
mf6_stress_budget_output : file path
Binary output file
text : str
Text identifying flow term (e.g. 'FLOW-JA-FACE')
Returns
-------
df : DataFrame
Table with flow results. Columns:
node : node number (e.g. stream reach; 1-based)
node2 : connecting node (e.g. up or downstream reach; 1-based)
q : flow values
FLOW-AREA : area of JA-FACE?
kstpkper : (timestep, stress period)
time : total time in model units
"""
print('reading {} from\n{}...'.format(text, mf6_stress_budget_output))
ta = time.time()
cbobj = bf.CellBudgetFile(mf6_stress_budget_output,
precision='double'
)
times = cbobj.get_times()
dfs = []
if kstpkper is None:
kstpkper = cbobj.get_kstpkper()
# returns a list of recarrays (length: nnodes);
# one for each timestep, stress period
records = cbobj.get_data(text=text)
else:
# otherwise, just get the results
# for specified timestep, stress periods
if not isinstance(kstpkper, list):
kstpkper = [kstpkper]
records = []
for ksp in kstpkper:
records += cbobj.get_data(text=text, kstpkper=ksp)
for i, rec in enumerate(records):
df = pd.DataFrame(rec)
df['kstpkper'] = [kstpkper[i]] * len(df)
df['time'] = [times[i]] * len(df)
dfs.append(df.copy())
df = pd.concat(dfs)
print("finished in {:.2f}s\n".format(time.time() - ta))
return df.sort_values(by=['time', 'node'])
[docs]
def read_sfr_output(mf2005_sfr_outputfile=None,
mf2005_SfrFile_instance=None,
mf6_sfr_stage_file=None,
mf6_sfr_budget_file=None,
mf6_package_data=None,
model=None, grid_type='structured'):
"""Read MF-2005 or MF-6 style SFR output; return as DataFrame.
"""
model_version = None
packagedata = None
if model is not None:
model_version = model.version
if model_version == 'mf6':
packagedata = pd.DataFrame(model.sfr.packagedata.array.copy())
elif mf6_package_data is not None:
model_version = 'mf6'
if isinstance(mf6_package_data, str) or isinstance(mf6_package_data, Path):
skiprows = 0
names = None
with open(mf6_package_data) as src:
for line in src:
if line.strip().startswith('#'):
names = line.strip().split()
skiprows += 1
else:
ncol = len(line.strip().split())
break
if names is None:
if grid_type == 'structured':
names = ['rno', 'k', 'i', 'j', 'rlen', 'rwid', 'rgrd', 'rtp', 'rbth', 'rhk',
'man', 'ncon', 'ustrf', 'ndv']
else:
names = ['rno', 'cellid', 'rlen', 'rwid', 'rgrd', 'rtp', 'rbth', 'rhk',
'man', 'ncon', 'ustrf', 'ndv']
for i, _ in enumerate(range(len(names), ncol)):
names.append(f'aux_col{i+1}')
else:
names[0] = names[0].strip('#')
# read the packagedata as a string to handle "none" values
with open(mf6_package_data) as src:
raw_pd = src.read()
raw_pd = raw_pd.lower().replace('none', '0 0 0')
packagedata = pd.read_csv(io.StringIO(raw_pd), names=names,
skiprows=skiprows, delim_whitespace=True)
for col in ['rno', 'ifno', 'k', 'i', 'j']:
if col in packagedata:
packagedata[col] -= 1
if 'cellid' in packagedata.columns:
if not isinstance(packagedata['cellid'][0], int):
packagedata['cellid'] = [(c[0]-1, c[1] -1, c[2] -1) for c in packagedata['cellid']]
else:
packagedata['cellid'] -=1
else:
# make the dataframe on the .array attribute for flopy objects
# or mf6_package_data is assumed to be array-like
packagedata = pd.DataFrame(getattr(mf6_package_data, 'array', mf6_package_data))
if model_version == 'mf6':
# get the budget output
df = aggregate_mf6_stress_budget(mf6_sfr_budget_file)
# get the stage data
if mf6_sfr_stage_file is not None:
stg = read_mf6_dependent_variable_output(mf6_sfr_stage_file,
text='stage')
df.sort_values(by=['kstpkper', 'node'], inplace=True)
stg.sort_values(by=['kstpkper', 'node'], inplace=True)
df.set_index(['kstpkper', 'node'], inplace=True)
stg.set_index(['kstpkper', 'node'], inplace=True)
na_reaches = np.isnan(df.time.values)
#df.loc[~na_reaches].to_csv('df.csv')
#stg.loc[~na_reaches].to_csv('stg.csv')
#assert np.allclose(df.time.values, stg.time.values)
assert np.allclose(df.loc[~na_reaches].time.values,
stg.loc[~na_reaches].time.values)
assert np.array_equal(df.index, stg.index)
df['stage'] = stg['stage']
df.reset_index(inplace=True)
else:
df['stage'] = np.nan
# get the row, column location of SFR cells;
# compute stream depths
if packagedata is not None:
rd = packagedata
rno_col = {'rno', 'ifno'}.intersection(rd.columns).pop()
# convert reach number to zero-based
if rd[rno_col].min() == 1:
rd[rno_col] -= 1
assert rd[rno_col].min() == 0
assert df.node.min() == 0
strtop_col = {'rtp', 'strtop'}.intersection(rd.columns).pop()
rno_strtop = dict(zip(rd[rno_col], rd[strtop_col]))
df['strtop'] = pd.to_numeric([rno_strtop[rno] for rno in df.node.values], errors='coerce')
# fill nan stages with their streambed tops
isna = df['stage'].isna()
df.loc[isna, 'stage'] = df.loc[isna, 'strtop']
df['depth'] = df['stage'] - df['strtop']
if 'cellid' not in rd.columns:
rd['cellid'] = list(zip(rd['k'], rd['i'], rd['j']))
rno_cellid = dict(zip(rd[rno_col], rd.cellid))
for i, dim in enumerate(['k', 'i', 'j']):
df[dim] = pd.to_numeric([rno_cellid[rno][i] for rno in df.node.values], errors='coerce')
df.dropna(subset=['k', 'i', 'j'], axis=0, inplace=True)
# can't convert to integers if nans are present
for dim in ['k', 'i', 'j']:
df[dim] = df[dim].astype(int)
assert 'int' in df[dim].dtype.name
else:
# SFR output
if mf2005_sfr_outputfile is not None:
sfrobj = SfrFile(mf2005_sfr_outputfile)
elif mf2005_SfrFile_instance is not None:
sfrobj = mf2005_SfrFile_instance
else:
print('Need path to SFR tabular budget output or FloPy SfrFile instance.')
df = sfrobj.df.copy()
df.sort_values(by=['segment', 'reach'], inplace=True)
return df