Source code for mfobs.modflow

"""
Functions specific to MODFLOW and flopy
"""
import json
import time
from pathlib import Path
from affine import Affine
import numpy as np
import pandas as pd
try:
    import flopy
except:
    flopy = False
from mfobs.utils import set_period_start_end_dates
from mfobs.fileio import write_insfile, read_csv
from mfobs.utils import set_period_start_end_dates


itmuni_text = {0: "undefined",
               1: "seconds",
               2: "minutes",
               3: "hours",
               4: "days",
               5: "years"
                 }

[docs] def get_gwf_obs_input(gwf_obs_input_file, gwf_obs_block=None): """Read the first BEGIN continuous FILEOUT block of an input file to the MODFLOW-6 GWF observation utility. Parameters ---------- gwf_obs_input_file : str Input file to MODFLOW-6 observation utility (contains layer information). gwf_obs_block : None or int Optional argument to read a specific observation block or all blocks from GWF observation utility file. Value of None returns observations from all blocks. Integer value returns obs from a specifc block, in (zero-based) order from top to bottom. For example, a value of 0 would return the first obs block, value of 1 would return the second obs block, and so on. Note ---- Modflow-setup writes all of the observation input to a single block, but observation input can be broken out into multiple blocks (one per file). This also doesn't work with open/close statements. """ if gwf_obs_block == None: with open(gwf_obs_input_file) as src: for line in src: if 'BEGIN continuous' in line: # read whole file with pandas.read_csv # skipping over any lines that can't be read # (MODFLOW 6 block headers and footers) df = pd.read_csv(src, sep='\\s+', header=None, on_bad_lines='skip') # remove the MODFLOW 6 block headers and footers # if they were included in the dataframe bad_lines = pd.to_numeric(df[2], errors='coerce').isna() df = df.loc[~bad_lines].copy() # enfore ints for index values # (pandas will cast to float if there are nans) df[[2, 3, 4]] = df[[2, 3, 4]].astype(int) if df.isna().any().any(): raise ValueError(f"Nan values in {gwf_obs_input_file}; check input.") elif isinstance(gwf_obs_block, int): with open(gwf_obs_input_file) as src: begin_idxs = [] end_idxs = [] for i, line in enumerate(src): if 'BEGIN continuous' in line: begin_idxs.append(i+1) if 'END continuous' in line: end_idxs.append(i-1) df = pd.read_csv(gwf_obs_input_file, skiprows=begin_idxs[gwf_obs_block], skipfooter=(i-end_idxs[gwf_obs_block]), sep='\\s+', engine='python', header=None) else: raise ValueError("gwf_obs_block must be 'None' or an integer") df.dropna(axis=0, inplace=True) df.columns = ['obsname', 'obstype', 'k', 'i', 'j'] for index_col in 'k', 'i', 'j': df[index_col] = df[index_col].astype(int) - 1 return df
[docs] def get_ij(transform, x, y): """Return the row and column of a point or sequence of points in real-world coordinates. Uses the affine package. Basically, * to get an x, y: transform * (col, row) * the inverse, ~transform * (x, y) returns a fractional i, j location on the grid Parameters ---------- transform : affine.Affine instance An `affine.Affine <https://github.com/sgillies/affine>`_ object describing the orientation of the model grid. Modflow-setup :class:`~mfsetup.grid.MFsetupGrid` have this attached via the :meth:`~mfsetup.grid.MFsetupGrid.transform` property. Example:: modelgrid_transform=affine.Affine(1000.0, 0.0, 500955, 0.0, -1000.0, 1205285) for a uniform spacing of 1000 and upper left corner of 500955, 1205285 with a rotation of 45 degrees, counter-clockwise about the upper left corner:: modelgrid_transform=affine.Affine(1000.0, 0.0, 500955, 0.0, -1000.0, 1205285).rotation(45.) An ``affine.Affine`` instance can also be created from a `Modflow-setup <https://github.com/aleaf/modflow-setup>`_ grid JSON file via the :func:`~mfobs.modflow.get_modelgrid_transform` function. x : scalar or sequence of x coordinates y : scalar or sequence of y coordinates Returns ------- i : row or sequence of rows (zero-based) j : column or sequence of columns (zero-based) """ if not np.isscalar(x) and not isinstance(x, np.ndarray): x = np.array(x) if not np.isscalar(y) and not isinstance(y, np.ndarray): y = np.array(y) j, i = ~transform * (x, y) # decrement so that points within a cell (pixel) # round to the cell center # (transform returns fractional row/column positions # relative to the upper left corner) i = np.round(i - 0.5, 0).astype(int) j = np.round(j - 0.5, 0).astype(int) return i, j
[docs] def get_kstp_kper(nstp): """Given a sequence of the number of timesteps in each stress period, return a sequence of timestep, period tuples (kstp, kper) used by various flopy methods. """ kstp = [] kper = [] for i, nstp in enumerate(nstp): for j in range(nstp): kstp.append(j) kper.append(i) return kstp, kper
[docs] def get_layer(column_name): """Pandas appends duplicate column names with a .*, where * is the number of duplicate names from left to right (e.g. obs, obs.1, obs.2, ...). Modflow-setup writes observation input for each model layer, at each site location, with the same observation prefix (site identifier). MODFLOW-6 reports the result for each layer with duplicate column names, with layers increasing from left to right (e.g. obs, obs, obs, ...). Parse the layer number from column_name, returning zero if there is no '.' separator. Notes ----- The approach can't be used if the model includes inactive cells (including pinched layers) at the locations of observations, because it assumes that the layer numbers are consecutive, starting at 0. For example, a pinched layer 2 in a 4 layer model would result in the observation being in layers 0, 1, 3, which would be misinterpreted as 0, 1, 2. """ if '.' not in column_name: return 0 return int(column_name.split('.')[-1])
[docs] def get_site_no(obsprefix, obsprefix_includes_variable=True): """Parse observation site number from an observation prefix string, which may include a variable at the end, delimited by a hyphen. Also split off any part of the string after a period ('.'), for example index instance numbers appended by pandas. Examples --------- >>> get_site_no('15266500') '15266500' >>> get_site_no('15266500-flow') '15266500' >>> get_site_no('bc-east-fork-s12-flow') 'bc-east-fork-s12' >>> get_site_no('bc-east-fork-s12', obsprefix_includes_variable=False) 'bc-east-fork-s12' >>> get_site_no('15266500.1') '15266500' """ parts = obsprefix.split('-') if len(parts) == 1 or not obsprefix_includes_variable: return obsprefix.split('.')[0] else: return '-'.join(parts[:-1])
[docs] def get_mf6_single_variable_obs(perioddata, model_output_file, gwf_obs_input_file=None, variable=None, names_include_variable=False, abs=True, gwf_obs_block=None): r"""Read raw MODFLOW-6 observation output from csv table with times along the row axis and observations along the column axis. Reshape (stack) results to be n times x n sites rows, with a single observation value in each row. If there is more than one time in a stress period, retain only the last time (so that there is one observation per stress period for each site. If an input file to the MODFLOW-6 observation utility is included, include the observation layer number in the output. Parameters ---------- perioddata : str, pathlike or DataFrame Path to csv file or pandas DataFrame with start/end dates for stress periods. Must have the following columns: =================== ================================================================= time modflow simulation time, in days\ :sup:`1` start_datetime start date for each stress period end_datetime end date for each stress period =================== ================================================================= 1) Times in the `time` column must correspond to times in the `time` column of gage package output. model_output_file : str Path to MODFLOW-6 observation csv output (shape: n times rows x n obs columns). gwf_obs_input_file : str Input file to MODFLOW-6 observation utility (contains layer information). variable : str Variable name for observations. If supplied, observation prefixes will be named following the format <site number>-<variable>. If observations are already set up in MODFLOW with the format <site number>-<variable> *and* `names_include_variable=True`, then this is not needed. By default, None, in which case observation prefixes are the same as the site numbers. names_include_variable : bool Option to indicate that the names in the MODFLOW observation input have the format <site number>-<variable> (which allows for multiple observations of different variables at a site). In this case, observation prefixes will be named from the MODFLOW observation names, and any `variable` argument will be ignored. By default, False, in which case the MODFLOW observation names are assumed to be the site numbers. Observation prefixes will be formulated from the MODFLOW names and variable. abs : bool, optional Option to convert simulated values to absolute values. For example, downstream-flow observations, which by convention are reported as negative by MODFLOW 6, but will be compared to positive observed values. By default, True. gwf_obs_block : None or int Argument to read a specific observation block or all blocks from GWF observation utility file. Value of None returns observations from all blocks. Integer value returns obs from a specifc block, in (zero-based) order, from top to bottom. For example, a value of 0 would return the first obs block, value of 1 would return the second obs block, and so on. Modflow-setup writes all of the observation input to a single block, but observation input can be broken out into multiple blocks (one per file). By default, None (All blocks) Returns ------- results : DataFrame DataFrame with one head observation per row, with the following columns: =================== ============================================================= datetime pandas datetimes, based on stress period start date site_no unique identifier for each site variable MODFLOW variable name for observed value obsprefix prefix of observation name (site identifier) sim_obsval simulated values time modflow simulation time, in days per modflow stress period\ :sup:`1` =================== ============================================================= 1) Stress period information is needed to differentiate between steady-state and transient observations that may have the same timestamp. """ # read in/set up the perioddata table if not isinstance(perioddata, pd.DataFrame): perioddata = pd.read_csv(perioddata) else: perioddata = perioddata.copy() set_period_start_end_dates(perioddata) perioddata.index = perioddata['time'] print('reading model output from {}...'.format(model_output_file)) model_output = read_csv(model_output_file, dtype='float64') # convert all observation names to lower case model_output.columns = model_output.columns.str.lower() # add stress period information to model output # by having pandas match time floats in indices # the last time in each stress period is retained # (corresponding to the model end time listed in perioddata) model_output.index = model_output['time'] perioddata.index = perioddata['time'] model_output['per'] = perioddata['per'] model_output['perioddata_time'] = perioddata['time'] # TODO: backfill head so that all times have SP and time info # add timestep info as well model_output.dropna(subset=['per'], axis=0, inplace=True) model_output['per'] = model_output['per'].astype(int) assert np.allclose(model_output.time.values, model_output.perioddata_time.values) #model_output.index = model_output['per'] # reshape the model output from (nper rows, nsites columns) to nper x nsites rows periods = dict(zip(perioddata['time'], perioddata['per'])) stacked = model_output.drop(['time', 'perioddata_time', 'per'], axis=1).stack(level=0).reset_index() simval_col = 'sim_obsval' stacked.columns = ['time', 'obsprefix', simval_col] stacked['site_no'] = [get_site_no( s, obsprefix_includes_variable=names_include_variable) for s in stacked.obsprefix] if names_include_variable: variables = [] for s in stacked['obsprefix'].str.split('-'): if len(s) == 1: raise ValueError('names_include_variable=True, ' f'but observation prefix: {s} only has one component.' 'When names_include_variable=True, observation prefixes ' 'are expected to be in the format <site number>-<variable>.') variables.append(s[-1]) stacked['variable'] = variables else: stacked['variable'] = variable stacked['per'] = [periods[time] for time in stacked['time']] stacked.index = stacked['time'] # optionally convert simulated values to absolute values if abs: stacked[simval_col] = stacked[simval_col].abs() # add dates stacked['datetime'] = perioddata['end_datetime'] assert not stacked.drop('variable', axis=1).isna().any().any() # add dates #perlen = dict(zip(perioddata.per, perioddata.perlen)) #period_start_dates = dict(zip(perioddata.per, perioddata.start_datetime)) #period_end_dates = dict(zip(perioddata.per, perioddata.end_datetime)) #stacked['datetime'] = pd.to_datetime([period_end_dates.get(per) for per in stacked.per]) # get the start date of the next period # so that suffix for an observation would be consistent with the start date of the next obs #next_period_start = [period_start_dates.get(per) for per in stacked.per][1:] #last_per = perioddata.per.max() #last_end_date = pd.Timestamp(period_start_dates[last_per]) + \ # pd.Timedelta(perlen[last_per], unit='d') #next_period_start.append(last_end_date) # parse the layers from the column positions (prior to stacking) if gwf_obs_input_file is not None: gwf_obs_input = get_gwf_obs_input(gwf_obs_input_file, gwf_obs_block=gwf_obs_block) # Assign layer to each observation, # assuming that numbering in gwf_obs_input is repeated nper times ntimes = len(stacked['time'].unique()) stacked['layer'] = gwf_obs_input['k'].tolist() * ntimes # reset the obsprefixes to be the same for different layers at a location stacked['obsprefix'] = [prefix.split('.')[0] for prefix in stacked.obsprefix] # assign obsnames using the prefixes (location identifiers) and month #obsnames = [] #for prefix, per, dt in zip(stacked.obsprefix, stacked.per, stacked.datetime): # if per == label_period_as_steady_state: # name = f"{prefix}_ss" # elif obsnme_date_suffix and not pd.isnull(dt): # name = f"{prefix}_{dt.strftime(obsnme_suffix_format)}" # elif not obsnme_date_suffix: # suffix = f"{per:{obsnme_suffix_format.strip('{:}')}}" # name = f"{prefix}_{suffix}" # else: # name = prefix # obsnames.append(name) #stacked['obsnme'] = obsnames # drop any duplicate observations, keeping those from transient periods # (for example, an initial steady-state period that isn't being used for observations # and a subsequent transient period with the same start date) # first make temp obsnames that include layer # steady state periods that are being used for observations (label_period_as_steady_state=True) # won't be dropped because their obs will have an "ss" suffix instead of a date suffix #if gwf_obs_input_file is not None: # unique_obsnames = ['{}_{}'.format(name, layer) # for name, layer in zip(stacked.obsnme, stacked.layer)] #else: # unique_obsnames = stacked.obsnme.to_list() #stacked['unique_obsnames'] = unique_obsnames #are_duplicates = stacked['unique_obsnames'].duplicated(keep=False) ##are_duplicates = pd.Series(unique_obsnames).duplicated(keep=False).values #if any(are_duplicates): # #duplicated_obsnames = set(stacked.loc[are_duplicates.values, 'obsnme']) # steady_obs = stacked.per.isin(perioddata.loc[perioddata.steady, 'per'].values) # drop = are_duplicates & steady_obs # stacked = stacked.loc[~drop] # #unique_obsnames = np.array(unique_obsnames)[~drop] # #assert not any(pd.Series(unique_obsnames).duplicated()) #if stacked['unique_obsnames'].duplicated().any(): # duplicates = stacked.loc[stacked['unique_obsnames']. \ # duplicated(keep=False)].sort_values(by='unique_obsnames') # msg = ("mfobs.modflow.get_mf6_single_variable_obs:" # "Duplicate observation names.\n\nIf obsnme_date_suffix=True, " # "you may need a more specific obsnme_suffix_format, e.g. '%Y%m%d\n\n" # "Or, if the head observation input to MODFLOW is set up with an observation in each layer,\n" # "supply a gwf_obs_input_file so that unique observations can identified in each layer.\n\n" # "There could also be a mismatch between the time discretization of the model results (e.g. perlen)\n" # "and the start and end dates in the stress period data table (perioddata).\n" # "In the latter case, you may need to re-run the model " # f"and possibly the model setup.\n\nDuplicated obs:{duplicates}" # "" # ) # raise ValueError(msg) # #stacked.index = stacked['obsnme'] stacked.reset_index(drop=True, inplace=True) sort_cols = [c for c in ['obsprefix', 'datetime'] if c in stacked.columns] stacked.sort_values(by=sort_cols, inplace=True) stacked['obsprefix'] = [f"{sn}-{var}" if var is not None else str(sn) for sn, var in zip(stacked['site_no'], stacked['variable'])] cols = ['datetime', 'site_no', 'variable', 'obsprefix', simval_col, 'layer', 'time', 'per'] results = stacked[[c for c in cols if c in stacked.columns]] return results
[docs] def read_mf_gage_package_output_files(gage_package_output_files, variable=None): """Read one of more gage package output files into a single table. If multiple files are specified, a single variable to return must be specified. Parameters ---------- gage_package_output_files : str/pathlike, or list of pathlikes Path to gage package output file(s). Multiple files will be concatenated into one output table. variable : str Variable to return. Only required if multiple files are specified to gage_package_output_files. Returns ------- df : DataFrame Gage package output, with times along the row axis, and variables or sites along the column axis. """ if isinstance(gage_package_output_files, str) or isinstance(gage_package_output_files, Path): gage_package_output_files = [gage_package_output_files] else: if variable is None: raise ValueError("mfobs.modflow.read_mf_gage_package_output_files: " "variable must be specified when reading multiple files.") dfs = [] for f in gage_package_output_files: print(f'reading model output from {f}...') # set the observation prefix (site name) from file name obsprefix = Path(f).stem.replace('-', '').replace('_', '').lower() with open(f) as src: header = next(iter(src)) names = next(iter(src)).strip().strip('"DATA:').strip() names = names.replace('.', '').lower().split() df = pd.read_csv(src, sep='\\s+', header=None, names=names) df.index = df['time'] if variable is not None: variable = variable.replace('.', '').lower() df = df[[variable]] # keep as DataFrame col_name = f"{obsprefix}-{variable}" df.rename(columns={variable: col_name}, inplace=True) dfs.append(df) if len(dfs) > 0: df = pd.concat(dfs, axis=1) return df
[docs] def get_mf_gage_package_obs(perioddata, gage_package_output_files, gwf_obs_input_file=None, variable='flow', abs=True): r"""Read raw MODFLOW gage package text file output with times along the row axis and observations along the column axis. Reshape (stack) results to be n times x n sites rows, with a single observation value in each row. Observations can be aggregated from the raw model output to the stress period level (aggregate_obs_to_stress_periods=True), which is often desired for models with monthly stress periods. If there is more than one time in a stress period, only the last timestep is retained (so that there is one observation per stress period for each site). Alternatively, for models with daily stresses on daily timesteps (for example, GSFLOW), observations can be created for each timestep. If an input file to the MODFLOW observation package is specified, include the observation layer number in the output. Parameters ---------- perioddata : str, pathlike or DataFrame Path to csv file or pandas DataFrame with start/end dates for stress periods. Must have the following columns: =================== ================================================================= time modflow simulation time, in days\ :sup:`1` start_datetime start date for each stress period end_datetime end date for each stress period =================== ================================================================= 1) Times in the `time` column must correspond to times in the `time` column of gage package output. gage_package_output_files : str/pathlike, or list of pathlikes Path to gage package output file(s). Multiple files will be concatenated into one output table. model_obs_input_file : str Input file to MODFLOW (hydmod or gage??) for observations. variable : str or sequence Variable(s) to read from gage package output. abs : bool, optional Option to convert simulated values to absolute values Returns ------- results : DataFrame DataFrame with one head observation per row, with the following columns: =================== ============================================================= datetime pandas datetimes, based on stress period start date site_no unique identifier for each site variable PRMS variable name obsprefix prefix of observation name (site identifier) sim_obsval simulated values time modflow simulation time, in days per modflow stress period\ :sup:`1` =================== ============================================================= 1) Stress period information is needed to differentiate between steady-state and transient observations that may have the same timestamp. """ # read in/set up the perioddata table if not isinstance(perioddata, pd.DataFrame): perioddata = pd.read_csv(perioddata) else: perioddata = perioddata.copy() set_period_start_end_dates(perioddata) perioddata.index = perioddata['time'] if isinstance(gage_package_output_files, str) or isinstance(gage_package_output_files, Path): gage_package_output_files = [gage_package_output_files] if isinstance(variable, str): variable = [variable] simval_col = 'sim_obsval' dfs = [] for var in variable: output = read_mf_gage_package_output_files(gage_package_output_files, var) output = output.stack().reset_index() output.columns = ['time', 'obsprefix', simval_col] dfs.append(output) model_output = pd.concat(dfs) # (columns from read_mf_gage_package_output_files should all be lower case) # add stress period information to model output # by having pandas match time floats in indices # the last time in each stress period is retained # (corresponding to the model end time listed in perioddata) model_output.index = model_output['time'] perioddata.index = perioddata['time'] model_output['per'] = perioddata['per'] model_output['perioddata_time'] = perioddata['time'] model_output.dropna(subset=['per'], axis=0, inplace=True) model_output['per'] = model_output['per'].astype(int) model_output['site_no'] = [s[0] for s in model_output.obsprefix.str.split('-')] model_output['variable'] = [s[1] for s in model_output.obsprefix.str.split('-')] assert np.allclose(model_output.time.values, model_output.perioddata_time.values) #model_output.index = model_output['time'] # reshape the model output from (nper rows, nsites columns) to nper x nsites rows #stacked = model_output.drop(['time', 'perioddata_time', 'per'], axis=1).stack(level=0).reset_index() #simval_col = 'sim_{}'.format(variable_name) #stacked.columns = ['per', 'obsprefix', simval_col] stacked = model_output # optionally convert simulated values to absolute values if abs: stacked[simval_col] = stacked[simval_col].abs() # add dates stacked['datetime'] = perioddata['end_datetime'] assert not stacked.isna().any().any() #period_end_dates = dict(zip(perioddata.per, perioddata.end_datetime)) #stacked['datetime'] = pd.to_datetime([period_end_dates.get(per) for per in stacked.per]) # get the start date of the next period # so that suffix for an observation would be consistent with the start date of the next obs #next_period_start = [period_start_dates.get(per) for per in stacked.per][1:] #last_per = perioddata.per.max() #last_end_date = pd.Timestamp(period_start_dates[last_per]) + \ # pd.Timedelta(perlen[last_per], unit='d') #next_period_start.append(last_end_date) # parse the layers from the column positions (prior to stacking) #if gwf_obs_input_file is not None: # gwf_obs_input = get_gwf_obs_input(gwf_obs_input_file) # # Assign layer to each observation, # # assuming that numbering in gwf_obs_input is repeated nper times # nper = len(stacked.per.unique()) # stacked['layer'] = gwf_obs_input['k'].tolist() * nper # reset the obsprefixes to be the same for different layers at a location #stacked['obsprefix'] = [prefix.split('.')[0] for prefix in stacked.obsprefix] # assign obsnames using the prefixes (location identifiers) and month #obsnames = [] #for prefix, per, dt in zip(stacked.obsprefix, stacked.per, stacked.datetime): # if per == label_period_as_steady_state: # name = f"{prefix}_ss" # elif obsnme_date_suffix and not pd.isnull(dt): # name = f"{prefix}_{dt.strftime(obsnme_suffix_format)}" # elif not obsnme_date_suffix: # suffix = f"{per:{obsnme_suffix_format.strip('{:}')}}" # name = f"{prefix}_{suffix}" # else: # name = prefix # obsnames.append(name) #stacked['obsnme'] = obsnames # drop any duplicate observations, keeping those from transient periods # (for example, an initial steady-state period that isn't being used for observations # and a subsequent transient period with the same start date) # first make temp obsnames that include layer # steady state periods that are being used for observations (label_period_as_steady_state=True) # won't be dropped because their obs will have an "ss" suffix instead of a date suffix #if gwf_obs_input_file is not None: # unique_obsnames = ['{}_{}'.format(name, layer) # for name, layer in zip(stacked.obsnme, stacked.layer)] #else: # unique_obsnames = stacked.obsnme.to_list() #stacked['unique_obsnames'] = unique_obsnames #are_duplicates = stacked['unique_obsnames'].duplicated(keep=False) #are_duplicates = pd.Series(unique_obsnames).duplicated(keep=False).values #if any(are_duplicates): # #duplicated_obsnames = set(stacked.loc[are_duplicates.values, 'obsnme']) # steady_obs = stacked.per.isin(perioddata.loc[perioddata.steady, 'per'].values) # drop = are_duplicates & steady_obs # stacked = stacked.loc[~drop] # #unique_obsnames = np.array(unique_obsnames)[~drop] # #assert not any(pd.Series(unique_obsnames).duplicated()) #if stacked['unique_obsnames'].duplicated().any(): # duplicates = stacked.loc[stacked['unique_obsnames']. \ # duplicated(keep=False)].sort_values(by='unique_obsnames') # msg = ("mfobs.modflow.get_mf6_single_variable_obs:" # "Duplicate observation names. If obsnme_date_suffix=True, " # "you may need a more specific obsnme_suffix_format, e.g. '%Y%m%d\n" # "Or there may be a mismatch between the model results (e.g. perlen) " # "and start and end dates in the stress period data table (perioddata).\n" # "In the latter case, you may need to re-run the model " # f"and possibly the model setup.\nDuplicated obs:{duplicates}" # "" # ) # raise ValueError(msg) # #stacked.index = stacked['obsnme'] stacked.reset_index(drop=True, inplace=True) sort_cols = [c for c in ['obsprefix', 'datetime'] if c in stacked.columns] stacked.sort_values(by=sort_cols, inplace=True) results = stacked[['datetime', 'site_no', 'variable', 'obsprefix', simval_col, 'time', 'per']] return results
[docs] def get_modflow_mass_balance(modroot, outfile=None, write_ins=True): """ read in the percent discrepancy for inset and parent models Parameters ---------- modroot: root name of the model scenario outfile: filepath for output write_ins: bool. whether or not to write instruction file """ print('reading in the mass balance files') # make a list with which to concatenate results dfs = [] # read in both inset and parent list files for cmod in ['inset', 'parent']: # read in the list files mfl6 = fp.utils.Mf6ListBudget("{0}{1}_{2}.list".format(rundir, modroot, cmod)) # get all the budget information df, _ = mfl6.get_dataframes(start_datetime="1-1-2012") # construct the obsname with the date etc. df['obsnme'] = ['{0}_discrep_{1:d}{2:02d}'.format(cmod, i.year, i.month) for i in df.index] # append on the max absolute percent discrepancy df = df.append({'obsnme': '{}_discrep_max'.format(cmod), 'PERCENT_DISCREPANCY': df.PERCENT_DISCREPANCY.abs().max()}, ignore_index=True) dfs.append(df[['obsnme', 'PERCENT_DISCREPANCY']]) outdf = pd.concat(dfs) outdf['group'] = 'percent_discrep' outdf['obsval'] = 0 outdf.to_csv(outfile, index=False, sep=' ') print(f'wrote {len(outdf):,} observations to {outfile}') if write_ins: write_insfile(outdf, outfile + '.ins', obsnme_column='obsnme', simulated_obsval_column='PERCENT_DISCREPANCY', index=False)
[docs] def get_modelgrid_transform(grid_json_file, shift_to_cell_centers=False): """Create an affine.Affine that describes the model grid from a json file. The affine package comes with rasterio as a dependency or can be installed separately. Parameters ---------- grid_json_file : str Model grid json file produced by modflow-setup shift_to_cell_centers : bool By default, transform reflects the upper left corner of the first cell in the model, and any conversions of x, y coordinates to pixels will be relative to upper left corners. If shift_to_cell_centers=True, x,y points will be referenced to the nearest cell centers. """ with open(grid_json_file) as f: cfg = json.load(f) for dx in 'delr', 'delc': if not np.isscalar(cfg[dx]): cfg[dx] = cfg[dx][0] xul = cfg['xul'] yul = cfg['yul'] if shift_to_cell_centers: xul += 0.5 * cfg['delr'] yul -= 0.5 * cfg['delc'] transform = Affine(cfg['delr'], 0., xul, 0., -cfg['delc'], yul) * \ Affine.rotation(-cfg['angrot']) return transform
[docs] def read_mf6_lake_obs(f, perioddata, start_date='2012-01-01', keep_only_last_timestep=True): df = pd.read_csv(f) df.columns = df.columns.str.lower() # convert modflow time to actual elapsed time # (by subtracting off the day for the initial steady-state period) if df.time.iloc[0] == 1: df['time'] -= 1 # get stress period information for each timestep recorded kstp, kper = get_kstp_kper(perioddata.nstp) df['kstp'] = kstp df['kper'] = kper if len(df) == len(kstp) + 1: df = df.iloc[1:].copy() if keep_only_last_timestep: df = df.groupby('kper').last().reset_index() start_ts = pd.Timestamp(start_date) df['datetime'] = pd.to_timedelta(df.time, unit='D') + start_ts df.index = df.datetime return df.drop('datetime', axis=1)
[docs] def get_transmissivities(heads, hk, top, botm, i=None, j=None, x=None, y=None, modelgrid_transform=None, screen_top=None, screen_botm=None, include_zero_open_intervals=True, nodata=-999): """ Computes transmissivity in each model layer at specified locations and open intervals. A saturated thickness is determined for each row, column or x, y location supplied, based on the open interval (sctop, screen_botm), if supplied, otherwise the layer tops and bottoms and the water table are used. Parameters ---------- heads : 2D array OR 3D array numpy array of shape nlay by n locations (2D) OR complete heads array of the model for one time (3D) hk : 3D numpy array horizontal hydraulic conductivity values. top : 2D numpy array model top elevations. botm : 3D numpy array layer botm elevations. i : 1D array-like of ints, of length n locations zero-based row indices (optional; alternately specify x, y) j : 1D array-like of ints, of length n locations zero-based column indices (optional; alternately specify x, y) x : 1D array-like of floats, of length n locations x locations in real world coordinates (optional). If x and y are specified, a modelgrid_transform must also be provided. y : 1D array-like of floats, of length n locations y locations in real world coordinates (optional) If x and y are specified, a modelgrid_transform must also be provided. modelgrid_transform : affine.Affine instance, optional An `affine.Affine <https://github.com/sgillies/affine>`_ object describing the orientation of the model grid. Only required for getting i, j if x and y are specified. Modflow-setup :class:`~mfsetup.grid.MFsetupGrid` have this attached via the :meth:`~mfsetup.grid.MFsetupGrid.transform` property. Example:: modelgrid_transform=affine.Affine(1000.0, 0.0, 500955, 0.0, -1000.0, 1205285) for a uniform spacing of 1000 and upper left corner of 500955, 1205285 with a rotation of 45 degrees, counter-clockwise about the upper left corner:: modelgrid_transform=affine.Affine(1000.0, 0.0, 500955, 0.0, -1000.0, 1205285).rotation(45.) An ``affine.Affine`` instance can also be created from a `Modflow-setup <https://github.com/aleaf/modflow-setup>`_ grid JSON file via the :func:`~mfobs.modflow.get_modelgrid_transform` function. screen_top : 1D array-like of floats, of length n locations open interval tops (optional; default is model top) screen_botm : 1D array-like of floats, of length n locations open interval bottoms (optional; default is model bottom) include_zero_open_intervals : bool In cases where the screen top and screen bottom elevations are equal, assign the model layer transmissivity for the cell containing the screen top/bottom. nodata : numeric optional; locations where heads=nodata will be assigned T=0 Returns ------- T : 2D array of same shape as heads (nlay x n locations) Transmissivities in each layer at each location """ if i is not None and j is not None: pass elif x is not None and y is not None: # get row, col for observation locations i, j = get_ij(modelgrid_transform, x, y) else: raise ValueError('Must specify row, column or x, y locations.') # get k-values and botms at those locations # (make nlayer x n sites arrays) hk2d = hk[:, i, j] botm2d = botm[:, i, j] assert hk2d.shape == botm2d.shape, 'hk and bottom arrays must have consistent dimensions' if len(heads.shape) == 3: heads = heads[:, i, j] msg = 'Shape of heads array must be nlay x nhyd' assert heads.shape == botm2d.shape, msg # set open interval tops/bottoms to model top/bottom if None if screen_top is None: screen_top = top[i, j] if screen_botm is None: screen_botm = botm[-1, i, j] # make an nlayers x n sites array of layer tops tops = np.empty_like(botm2d, dtype=float) tops[0, :] = top[i, j] tops[1:, :] = botm2d[:-1] # expand top and bottom arrays to be same shape as botm, thickness, etc. # (so we have an open interval value for each layer) sctoparr = np.zeros(botm2d.shape) sctoparr[:] = screen_top scbotarr = np.zeros(botm2d.shape) scbotarr[:] = screen_botm # start with layer tops # set tops above heads to heads # set tops above screen top to screen top # (we only care about the saturated open interval) openinvtop = tops.copy() openinvtop[openinvtop > heads] = heads[openinvtop > heads] openinvtop[openinvtop > sctoparr] = sctoparr[openinvtop > screen_top] # start with layer bottoms # set bottoms below screened interval to screened interval bottom # set screen bottoms below bottoms to layer bottoms openinvbotm = botm2d.copy() openinvbotm[openinvbotm < scbotarr] = scbotarr[openinvbotm < screen_botm] openinvbotm[scbotarr < botm2d] = botm2d[scbotarr < botm2d] # compute thickness of open interval in each layer thick = openinvtop - openinvbotm # assign open intervals above or below model to closest cell in column not_in_layer = np.sum(thick < 0, axis=0) not_in_any_layer = not_in_layer == thick.shape[0] for i, n in enumerate(not_in_any_layer): if n: closest = np.argmax(thick[:, i]) thick[closest, i] = 1. thick[thick < 0] = 0 thick[heads == nodata] = 0 # exclude nodata cells thick[np.isnan(heads)] = 0 # exclude cells with no head value (inactive cells) # assign thickness of 1 to wells with no open interval # (screen top == screen bottom) if screen_top is not None and screen_botm is not None\ and include_zero_open_intervals: # locations of open intervals that are within a single layer in_single_layer = (screen_top < tops) & (screen_botm > botm2d) zero_thickness = thick == 0 thick[in_single_layer & zero_thickness] = 1. # compute transmissivities T = thick * hk2d return T
[docs] def read_mf6_block(filename, blockname): blockname = blockname.lower() data = {} read = False per = None with open(filename) as src: for line in src: line = line.lower() if 'begin' in line and blockname in line: if blockname == 'period': per = int(line.strip().split()[-1]) data[per] = [] elif blockname == 'continuous': fname = line.strip().split()[-1] data[fname] = [] elif blockname == 'packagedata': data['packagedata'] = [] else: blockname = line.strip().split()[-1] data[blockname] = [] read = blockname continue if 'end' in line and blockname in line: per = None read = False #break if read == 'options': line = line.strip().split() data[line[0]] = line[1:] elif read == 'packages': pckg, fname, ext = line.strip().split() data[pckg] = fname elif read == 'period': data[per].append(' '.join(line.strip().split())) elif read == 'continuous': data[fname].append(' '.join(line.strip().split())) elif read == 'packagedata': data['packagedata'].append(' '.join(line.strip().split())) elif read == blockname: data[blockname].append(' '.join(line.strip().split())) return data
[docs] def get_perioddata(tdis_file, sto_file=None, start_datetime=None, end_datetime=None, include_timesteps=False, model_time_units=None): """Make the perioddata table required by other modflow-obs functions from a MODFLOW-6 Temporal Discretization (TDIS) file. Parameters ---------- tdis_file : str or pathlike MODFLOW 6 TDIS, or MODFLOW-2005 style DIS Package file. Model version is determined from the extension ('.tdis' for MODFLOW 6; '.dis' for MODFLOW-2005) sto_file : str or pathlike MODFLOW 6 STO Package file start_datetime : str or pandas Timestamp Start date for model, if start date isn't specified in TDIS package file. Required for MODFLOW-2005 style models. end_datetime : str or pandas Timestamp (optional) End date of model, only required for GSFLOW models, where time-step specification is handled internally. end_datetime should be specified as midnight following the last full day simulated by the model. For example, if the model runs through 12/31/2019, end_datetime should be specified as 01/01/2020. include_timesteps : bool If True, return a perioddata table with one row for each time step, otherwise, return one row per stress period. By default, False model_time_units : str (optional) Time unit of model, in text understandable by pandas (e.g. "days"). Specification of time units will override what is read from the TDIS or DIS package file. Returns ------- perioddata : DataFrame """ tdis_file = str(tdis_file) mf6 = True if tdis_file.endswith('.tdis'): options = read_mf6_block(tdis_file, 'options') perioddata = read_mf6_block(tdis_file, 'perioddata') steady_period_blocks = None if sto_file is not None: steady_period_blocks = read_mf6_block(sto_file, 'period') if start_datetime is None: start_datetime = options['start_date_time'] if len(start_datetime) > 0: start_datetime = pd.Timestamp(start_datetime[0]) else: msg = (f'No start_date_time in {tdis_file};' 'start_date_time is needed to construct perioddata.') raise ValueError(msg) else: start_datetime = pd.Timestamp(start_datetime) if model_time_units is None: time_units = options['time_units'] if len(time_units) > 0: time_units = time_units[0] else: msg = (f'No model_time_units specified, and no time_units in {tdis_file};' 'time units are needed to construct perioddata.') raise ValueError(msg) else: time_units = model_time_units perlen = [] nstp = [] tsmult = [] for row in perioddata['perioddata']: if row.strip().startswith('#'): continue rperlen, rnstp, rtsmult = row.split('#')[0].split() perlen.append(float(rperlen)) nstp.append(int(rnstp)) tsmult.append(float(rtsmult)) if steady_period_blocks is not None: steady = [True if 'steady' in steady_period_blocks[i+1][0] else False for i in range(len(perlen))] else: steady = True else: mf6 = False if start_datetime is None: raise ValueError('start_datetimem required for MODFLOW-2005 style models') else: start_datetime = pd.Timestamp(start_datetime) with open(tdis_file) as src: for line in src: if line.strip().startswith('#'): continue else: dataset_1 = line.strip().split() if model_time_units is None: itmuni = int(dataset_1[4]) time_units = itmuni_text[itmuni] else: time_units = model_time_units if model_time_units is None and itmuni == 0: msg = (f'No model_time_units specified, and no time_units in {tdis_file};' 'time units are needed to construct perioddata.') raise ValueError(msg) break perlen = [] nstp = [] tsmult = [] steady = [] for line in src: line = line.lower() if 'ss' in line or 'tr' in line: if 'constant' not in line and 'open/close' not in line: per_len, per_nstp, per_tsmult, sstr = line.strip().split() perlen.append(float(per_len)) nstp.append(float(per_nstp)) tsmult.append(float(per_tsmult)) if sstr == 'ss': steady.append(True) elif sstr == 'tr': steady.append(False) else: raise ValueError(f'Invalid input for DIS package dataset 7, Ss/tr:\n{line}') perlen = np.array(perlen) actual_period_length = perlen.copy() if not np.isscalar(steady) and steady[0]: actual_period_length[0] = 0 elapsed_time = np.cumsum(actual_period_length).tolist() start_elapsed_times = [0] + elapsed_time[:-1] start_datetimes = start_datetime + pd.to_timedelta(start_elapsed_times, unit=time_units) # determine perlen and number of steps for a GSFLOW model with specified end date # (model is assumed to have time units of days # and daily timesteps) if not mf6 and end_datetime is not None: end_datetime = pd.Timestamp(end_datetime) perlen[-1] = (end_datetime - start_datetimes[-1]).days nstp[-1] = perlen[-1] end_datetimes = start_datetimes + pd.to_timedelta(perlen - 1, unit='d') # modflow time (includes length of initial steady-state period) mf_time = np.cumsum(perlen) perioddata = pd.DataFrame({'start_datetime': start_datetimes.strftime('%Y-%m-%d'), 'end_datetime': end_datetimes.strftime('%Y-%m-%d'), 'time': mf_time, 'per': np.arange(len(perlen)), 'perlen': perlen, 'nstp': np.array(nstp, dtype=int), 'tsmult': tsmult, 'steady': steady }) if include_timesteps: perioddata = add_timesteps_to_perioddata(perioddata) return perioddata
[docs] def add_timesteps_to_perioddata(perioddata): # check that perlens are consistent with elapsed MODFLOW times calc_perlen = np.diff([0] + list(perioddata['time'])) if not np.allclose(calc_perlen, perioddata['perlen']): msg = ('Specified perlen and times are inconsistent\n' f'times: {perioddata.time.values}\n' f'perlen: {perioddata.perlen.values}\n' ) raise ValueError(msg) dfs = [] for i, r in perioddata.iterrows(): perlen = r['perlen'] tsmult = r['tsmult'] nstp = r['nstp'] if tsmult == 1: ti = perlen / nstp else: # get the initial timestep size ti = perlen*(tsmult-1)/(tsmult**nstp -1) # remaining timestep sizes timesteps = [ti] + list(ti * np.cumprod([tsmult] * (nstp -1))) assert np.allclose(np.sum(timesteps), perlen) if nstp > 1: # make a dataframe for the stress period df = pd.concat([pd.DataFrame(r).T] * nstp) end_datetimes = pd.Timestamp(r['start_datetime']) + pd.to_timedelta(np.cumsum(timesteps), unit='d') start_datetimes = [pd.Timestamp(r['start_datetime'])] + list(end_datetimes[:-1]) df['start_datetime'] = start_datetimes df['end_datetime'] = end_datetimes df['time'] = list(r['time'] - np.cumsum(timesteps[::-1])[::-1])[1:] + [r['time']] df['per'] = r['per'] df['perlen'] = perlen df['nstp'] = nstp df['tsmult'] = tsmult df['timestep'] = list(range(nstp)) else: df = pd.DataFrame(r).T df['timestep'] = 0 dfs.append(df) timestepdata = pd.concat(dfs) for col in 'time', 'perlen', 'tsmult': timestepdata[col] = timestepdata[col].astype(float) for col in 'per', 'nstp', 'timestep': timestepdata[col] = timestepdata[col].astype(int) return timestepdata