Source code for mfexport.listfile

"""
Functions for exporting results from the MODFLOW listing file
"""
from pathlib import Path
import os
import numpy as np
import pandas as pd
import flopy
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from mfexport.units import (get_figure_label_unit_text, parse_flux_units,
                            convert_volume_units, convert_time_units)
from mfexport.utils import make_output_folders


# for each MODFLOW-6 listfile budget term
# get the MF-2005 equivalent
mf2005_terms = {'STO-SS': 'STORAGE',
                'STO-SY': 'STORAGE',
                'WEL': 'WELLS',
                'RCH': 'RECHARGE',
                'SFR': 'STREAM_LEAKAGE',
                'LAK': 'LAKES',
                'CHD': 'CONSTANT_HEAD',
                'GHB': 'HEAD DEP BOUNDS'
                }
plotted = set()

[docs] def get_listfile_model_version(listfile): with open(listfile) as src: firstline = src.readline() if 'MODFLOW 6' in firstline: return 'mf6' elif 'MODFLOW-NWT' in firstline: return 'mfnwt' elif 'MODFLOW-2005' in firstline: return 'mf2005'
[docs] def get_budget_keys(listfile, key_suffix='BUDGET FOR ENTIRE MODEL'): keys = set() with open(listfile) as src: for line in src: if key_suffix in line: keys.add(line.split(' AT ')[0].strip()) return keys
[docs] def get_listfile_data(listfile, model_start_datetime=None, budgetkey=None): cls = flopy.utils.MfListBudget model_version = get_listfile_model_version(listfile) if model_version == 'mf6': cls = flopy.utils.Mf6ListBudget if budgetkey is not None: keys = get_budget_keys(listfile) if budgetkey not in keys: #budget_package = budgetkey.replace('BUDGET FOR ENTIRE MODEL', '').strip().split('_')[0] budget_package = budgetkey.strip().split(' ')[0].split('_')[0] budgetkey = [k for k in keys if budget_package in k] if len(budgetkey) > 0: budgetkey = budgetkey[0] else: return class PackageBudget(cls): """Export the a Package Budget from the listing file. """ def set_budget_key(self): self.budgetkey = budgetkey return cls = PackageBudget mfl = cls(listfile) budget = mfl.get_dataframes(start_datetime=model_start_datetime) if budget is not None: df_flux, df_vol = budget kstp, kper = zip(*mfl.get_kstpkper()) df_flux['kstp'] = kstp df_flux['kper'] = kper return df_flux
[docs] def plot_list_budget(listfile, model_name=None, model_start_datetime=None, output_path='postproc', model_length_units=None, model_time_units=None, secondary_axis_units=None, xtick_stride=6, plot_start_date=None, plot_end_date=None, plot_pcts=False, datetime_xaxis=True): pdfs_dir, _, _ = make_output_folders(output_path) if model_name is None: model_name = Path(listfile).stem if model_start_datetime is None: datetime_xaxis=False df_flux = get_listfile_data(listfile, model_start_datetime=model_start_datetime) df_flux_lake = get_listfile_data(listfile, model_start_datetime=model_start_datetime, budgetkey='LAK BUDGET FOR ENTIRE MODEL') df_flux_sfr = get_listfile_data(listfile, model_start_datetime=model_start_datetime, budgetkey='SFR BUDGET FOR ENTIRE MODEL') out_pdf = pdfs_dir / 'listfile_budget_summary.pdf' with PdfPages(out_pdf) as pdf: # plot summary with only net values for each term ax = plot_budget_summary(df_flux, title_prefix=model_name, term_nets=True, model_length_units=model_length_units, model_time_units=model_time_units, secondary_axis_units=secondary_axis_units, xtick_stride=xtick_stride, plot_start_date=plot_start_date, plot_end_date=plot_end_date, plot_pcts=plot_pcts) if ax is not None: pdf.savefig() plt.close() # plot summary showing in and out values for all terms ax = plot_budget_summary(df_flux, title_prefix=model_name, model_length_units=model_length_units, model_time_units=model_time_units, secondary_axis_units=secondary_axis_units, xtick_stride=xtick_stride, plot_start_date=plot_start_date, plot_end_date=plot_end_date, plot_pcts=plot_pcts) if ax is not None: pdf.savefig() plt.close() # plot summary of annual net means ax = plot_budget_summary(df_flux, title_prefix=model_name, term_nets=True, model_length_units=model_length_units, model_time_units=model_time_units, secondary_axis_units=secondary_axis_units, xtick_stride=xtick_stride, plot_start_date=plot_start_date, plot_end_date=plot_end_date, annual_sums=True, plot_pcts=plot_pcts) if ax is not None: pdf.savefig() plt.close() print(f'wrote {out_pdf}') pdf_outfile = pdfs_dir / 'listfile_budget_by_term.pdf' with PdfPages(pdf_outfile) as pdf: plotted = set() terms = [c for c in df_flux.columns if c not in {'kstp', 'kper'}] for term in terms: if term not in plotted: plot_budget_term(df_flux, term, title_prefix=model_name, #plotted=plotted, model_length_units=model_length_units, model_time_units=model_time_units, secondary_axis_units=secondary_axis_units, xtick_stride=xtick_stride, plot_start_date=plot_start_date, plot_end_date=plot_end_date, datetime_xaxis=datetime_xaxis) pdf.savefig() plt.close() if df_flux_lake is not None and len(df_flux_lake) > 0: plotted = set() terms = [c for c in df_flux_lake.columns if c not in {'kstp', 'kper'}] for term in terms: if term not in plotted: title_prefix = '{} Lake Package'.format(model_name) plot_budget_term(df_flux_lake, term, title_prefix=title_prefix, #plotted=plotted, model_length_units=model_length_units, model_time_units=model_time_units, secondary_axis_units=secondary_axis_units, plot_start_date=plot_start_date, plot_end_date=plot_end_date, datetime_xaxis=datetime_xaxis) pdf.savefig() plt.close() if df_flux_sfr is not None and len(df_flux_sfr) > 0: plotted = set() terms = [c for c in df_flux_sfr.columns if c not in {'kstp', 'kper'}] for term in terms: if term not in plotted: title_prefix = '{} SFR Package'.format(model_name) plot_budget_term(df_flux_sfr, term, title_prefix=title_prefix, #plotted=plotted, model_length_units=model_length_units, model_time_units=model_time_units, secondary_axis_units=secondary_axis_units, plot_start_date=plot_start_date, plot_end_date=plot_end_date, datetime_xaxis=datetime_xaxis) pdf.savefig() plt.close() print(f'wrote {pdf_outfile}')
[docs] def plot_budget_summary(df, title_prefix='', title_suffix='', date_index_fmt='%Y-%m', term_nets=False, model_length_units=None, model_time_units=None, secondary_axis_units=None, xtick_stride=6, plot_start_date=None, plot_end_date=None, plot_pcts=False, annual_sums=False, special_column_renames=None, ): """Plot a stacked bar chart summary of a MODFLOW listing file budget dataframe. Parameters ---------- df : DataFrame Table of listing file budget results produced by flopy; typically the flux (not volume) terms (see example below). The DataFrame needs to have a datetime index. title_prefix : str, optional Prefix to insert at the begining of the title, for example the model name. by default '' title_suffix : str, optional Suffix to insert at the end of the title, by default '' date_index_fmt : str, optional Date format for the plot x-axis, by default '%Y-%m' term_nets : bool, optional Option to only plot net quantities for each stress period. For example if the inflows and outflows for the WEL package were +10 and -10, a bar of zero height would be plotted. by default False model_length_units : str, optional Length units of the model, for labeling and conversion to secondary_axis_units, by default None model_time_units : str, optional Time units of the model, for labeling and conversion to secondary_axis_units, by default None secondary_axis_units : str, optional Option to include a secondary y-axis on the right with another unit, for example 'mgal/day' for million gallons per day. Requires `model_length_units` and `model_time_units`. by default None xtick_stride : int, optional Spacing between x-ticks. May be useful for models with many stress periods. by default 6 plot_start_date : str, optional Minimum date to plot on the x-axis, in a string format recognizable by pandas (if `df` has a datetime index) or a numeric value (if `df` has a numeric index). by default None (plot all dates) plot_end_date : str, optional Maximum date to plot on the x-axis, in a string format recognizable by pandas (if `df` has a datetime index) or a numeric value (if `df` has a numeric index). by default None (plot all dates) plot_pcts : bool Option to include the percentage of each flux. by default, False annual_sums : bool Option to summarize budget by year (e.g. using :py:meth:`pandas.DataFrame.groupby`). Requires that ``df`` have a valid datetime index. by default, False Returns ------- ax : matplotlib axes subplot instance Examples -------- .. code-block:: python from mfexport.listfile import get_listfile_data, plot_budget_summary df = get_listfile_data(listfile='model.list', model_start_datetime='2000-01-01') plot_budget_summary(df) """ # slice the dataframe to the specified time range (if any) df = df.copy() df = df.loc[slice(plot_start_date, plot_end_date)] if annual_sums: if isinstance(df.index, pd.DatetimeIndex): dfa = df.groupby(df.index.year).mean() dfa['kper'] = df.groupby(df.index.year).last()['kper'] dfa['kstp'] = df.groupby(df.index.year).last()['kstp'] df = dfa else: print('Skipping, annual_sums requires a datetime index.') return if len(df) < xtick_stride * 2: xtick_stride = 1 fig, ax = plt.subplots(figsize=(11, 8.5)) in_cols = [c for c in df.columns if '_IN' in c and 'TOTAL' not in c and '(net)' not in c] # Zone Budget 6 output in_cols += [c for c in df.columns if '-IN' in c and '(net)' not in c] zone_flux_cols = {c: c.replace(' ', '_') for c in df.columns if 'FROM ' in c or 'TO ' in c} if special_column_renames is not None: special_column_renames = {zone_flux_cols.get(k, k): v for k, v in special_column_renames.items()} if any(zone_flux_cols): for old_name, new_name in zone_flux_cols.items(): df.rename(columns={old_name: new_name}, inplace=True) in_cols += [c for c in zone_flux_cols.values() if 'FROM' in c] out_cols = [c for c in df.columns if '_OUT' in c and 'TOTAL' not in c] out_cols += [c for c in df.columns if '-OUT' in c] out_cols += [c for c in zone_flux_cols.values() if 'TO' in c] # rename the special columns to make the plot more understandable if special_column_renames is not None: for old_name, new_name in special_column_renames.items(): df.rename(columns={old_name: new_name}, inplace=True) # update the in and out columns too if old_name in in_cols: in_cols[in_cols.index(old_name)] = new_name elif old_name in out_cols: out_cols[out_cols.index(old_name)] = new_name if not term_nets: ax = df[in_cols].plot.bar(stacked=True, ax=ax,# width=20 ) df[out_cols] *= -1 ax = (df[out_cols]).plot.bar(stacked=True, ax=ax,# width=20 ) df_pcts = df.copy() # zone budget output doesn't have "TOTAL" columns if "TOTAL_IN" not in df.columns: df['TOTAL_IN'] = df[in_cols].sum(axis=1) # zone budget output doesn't have "TOTAL_IN" if "TOTAL_OUT" not in df.columns: df['TOTAL_OUT'] = df[out_cols].sum(axis=1) df_pcts[in_cols] = df[in_cols].div(df['TOTAL_IN'], axis=0) df_pcts[out_cols] = df[out_cols].div(df['TOTAL_OUT'], axis=0) else: pairs = list(zip(in_cols, out_cols)) net_cols = [] for in_col, out_col in pairs: net_col = f"{in_col.replace('_IN', '').replace('-IN', '')} (net)" df[net_col] = df[in_col] - df[out_col] net_cols.append(net_col) ax = df[net_cols].plot.bar(stacked=True, ax=ax,# width=20 ) net_sums = df[net_cols][df[net_cols] > 0].sum(axis=1) df_pcts = df[net_cols].div(net_sums, axis=0) if isinstance(df.index, pd.DatetimeIndex): ax.set_xticklabels(df.index.strftime(date_index_fmt)) elif annual_sums: ax.set_xlabel('Calendar year') else: ax.set_xlabel('Time since the start of the simulation, in model units') ax.axhline(0, zorder=100, lw=0.5, c='k') # create ylabel with model units, if provided if model_length_units is not None and model_time_units is not None: units_text = get_figure_label_unit_text(model_length_units, model_time_units, length_unit_exp=3) else: units_text = '$L^3/T$' ax.set_ylabel(f'Flow rate, in model units of {units_text}') # add commas to y axis values formatter = mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')) ax.get_yaxis().set_major_formatter(formatter) # optional 2nd y-axis with other units if secondary_axis_units is not None: length_units2, time_units2 = parse_flux_units(secondary_axis_units) vol_conversion = convert_volume_units(model_length_units, length_units2) time_conversion = convert_time_units(model_time_units, time_units2) def fconvert(x): return x * vol_conversion * time_conversion def rconvert(x): return x / (vol_conversion * time_conversion) secondary_axis_unit_text = get_figure_label_unit_text(length_units2, time_units2, length_unit_exp=3) secax = ax.secondary_yaxis('right', functions=(fconvert, rconvert)) secax.set_ylabel(f'Flow rate, in {secondary_axis_unit_text}') secax.get_yaxis().set_major_formatter(formatter) # add stress period info ymin, ymax = ax.get_ylim() xlocs = np.arange(len(df)) yloc = np.ones(len(df)) * (ymin + 0.03 * (ymax - ymin)) if xtick_stride is None: xtick_stride = int(np.round(len(df) / 10, 0)) xtick_stride = 1 if xtick_stride < 1 else xtick_stride kpers = set() for x, y in zip(xlocs[::xtick_stride], yloc[::xtick_stride]): kper = int(df.iloc[x]['kper']) # only make one line for each stress period if kper not in kpers: ax.axvline(x, lw=0.5, c='k', zorder=-2) ax.text(x, y, f" {kper}", transform=ax.transData, ha='left', va='top') kpers.add(kper) ax.text(0, # min(kpers), # (the x loc of the first bar is always 0) y + abs(0.06*y), ' model stress period:', transform=ax.transData, ha='left', va='top') title_text = 'budget summary' if term_nets: title_text += ' (net fluxes)' title_text = ' '.join((title_prefix, title_text, title_suffix)).strip() ax.set_title(title_text) ax.legend(ncol=2) # reduce x-tick density ticks = ax.xaxis.get_ticklocs() ticklabels = [l.get_text() for l in ax.xaxis.get_ticklabels()] ax.xaxis.set_ticks(ticks[::xtick_stride]) ax.xaxis.set_ticklabels(ticklabels[::xtick_stride]) # add percentages for smaller datasets if plot_pcts: ymin, ymax = ax.get_ylim() # max bar height for printing % height_cutoff_frac = 0.01 height_cutoff = ymax * height_cutoff_frac for p in ax.patches: height = p.get_height() if abs(height) > height_cutoff: width = p.get_width() xloc, yloc = p.get_xy() x_value = int(xloc + 0.5 * width) # get the percentage if term_nets: loc = df[net_cols].iloc[x_value] == height else: loc = df.iloc[x_value] == height pct = df_pcts.iloc[x_value][loc].values[0] y_center = yloc + 0.5 * height # plot the text ax.text(x_value, y_center, f'{abs(pct):.1%}', fontsize=8, color='black', transform=ax.transData, ha='center', va='center') return ax
[docs] def plot_budget_term(df, term, title_prefix='', title_suffix='', model_length_units=None, model_time_units=None, secondary_axis_units=None, xtick_stride=None, plot_start_date=None, plot_end_date=None, datetime_xaxis=True): """Make a timeseries plot of an individual MODFLOW listing file budget term. Parameters ---------- df : DataFrame Table of listing file budget results produced by flopy; typically the flux (not volume) terms (see example below). title_prefix : str, optional Prefix to insert at the begining of the title, for example the model name. by default '' title_suffix : str, optional Suffix to insert at the end of the title, by default '' model_length_units : str, optional Length units of the model, for labeling and conversion to secondary_axis_units, by default None model_time_units : str, optional Time units of the model, for labeling and conversion to secondary_axis_units, by default None secondary_axis_units : str, optional Option to include a secondary y-axis on the right with another unit, for example 'mgal/day' for million gallons per day. Requires `model_length_units` and `model_time_units`. by default None xtick_stride : int, optional Spacing between x-ticks. May be useful for models with many stress periods. by default 6 plot_start_date : str, optional Minimum date to plot on the x-axis, in a string format recognizable by pandas (if `df` has a datetime index) or a numeric value (if `df` has a numeric index). May be useful if the model has long spinup period(s) that would obscure later periods of interest when datetime_xaxis=True. by default None (plot all dates) plot_end_date : str, optional Maximum date to plot on the x-axis, in a string format recognizable by pandas (if `df` has a datetime index) or a numeric value (if `df` has a numeric index). by default None (plot all dates) datetime_xaxis : bool Plot budget values as a function of time. If False, plot as a function of stress period. by default, True Returns ------- ax : matplotlib axes subplot instance Examples -------- .. code-block:: python from mfexport.listfile import get_listfile_data, plot_budget_summary df = get_listfile_data(listfile='model.list', model_start_datetime='2000-01-01') plot_budget_term(df, 'WELLS') """ # slice the dataframe to the specified time range (if any) df = df.copy() df = df.loc[slice(plot_start_date, plot_end_date)] if not datetime_xaxis and 'datetime' in df.index.dtype.name: df['datetime'] = df.index df.index = df['kper'] if term not in {'IN-OUT', 'PERCENT_DISCREPANCY'}: # get the absolute quantity for the term if isinstance(term, list): series = df[term].sum(axis=1) out_term = [s.replace('_IN', '_OUT') for s in term] out_series = df[out_term].sum(axis=1) else: term = term.replace('_IN', '').replace('_OUT', '') in_term = f'{term}_IN' out_term = f'{term}_OUT' series = df[in_term] out_series = df[out_term] # get the net net_series = series - out_series # get the percent relative to the total budget pct_series = series/df['TOTAL_IN'] pct_out_series = out_series/df['TOTAL_OUT'] pct_net_series = pct_series - pct_out_series else: series = df[term] out_term = None out_series = None if model_length_units is not None and model_time_units is not None: units_text = get_figure_label_unit_text(model_length_units, model_time_units, length_unit_exp=3) else: units_text = '$L^3/T$' if out_series is not None: fig, axes = plt.subplots(2, 1, sharex=True, figsize=(11, 8.5)) axes = axes.flat ax = axes[0] series.plot(ax=ax, c='C0') ax.axhline(0, zorder=-1, lw=0.5, c='k') (-out_series).plot(ax=ax, c='C1') net_series.plot(ax=ax, c='0.5', zorder=-1) h, l = ax.get_legend_handles_labels() ax.legend(h, ['In', 'Out', 'Net']) ax.set_ylabel(f'Flow rate, in model units of {units_text}') # add commas to y axis values formatter = mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')) ax.get_yaxis().set_major_formatter(formatter) # optional 2nd y-axis with other units if secondary_axis_units is not None: length_units2, time_units2 = parse_flux_units(secondary_axis_units) vol_conversion = convert_volume_units(model_length_units, length_units2) time_conversion = convert_time_units(model_time_units, time_units2) def fconvert(x): return x * vol_conversion * time_conversion def rconvert(x): return x / (vol_conversion * time_conversion) secondary_axis_unit_text = get_figure_label_unit_text(length_units2, time_units2, length_unit_exp=3) secax = ax.secondary_yaxis('right', functions=(fconvert, rconvert)) secax.set_ylabel(f'Flow rate, in {secondary_axis_unit_text}') secax.get_yaxis().set_major_formatter(formatter) # plot the percentage of total budget on second axis ax2 = axes[1] pct_series.plot(ax=axes[1], c='C0') ax2.axhline(0, zorder=-1, lw=0.5, c='k') (-pct_out_series).plot(ax=ax2, c='C1') pct_net_series.plot(ax=ax2, c='0.5', zorder=-1) ax2.set_ylabel('Fraction of budget') single_subplot = False # add stress period info #ymin, ymax = ax.get_ylim() #yloc = np.ones(len(df)) * (ymin - 0.02 * (ymax - ymin)) #if xtick_stride is None: # xtick_stride = int(np.round(len(df) / 10, 0)) # xtick_stride = 1 if xtick_stride < 1 else xtick_stride #kpers = set() #for x, y in zip(df.index.values[::xtick_stride], yloc[::xtick_stride]): # kper = int(df.loc[x, 'kper']) # # only make one line for each stress period # if kper not in kpers: # ax.axvline(x, lw=0.5, c='k', zorder=-2) # ax2.axvline(x, lw=0.5, c='k', zorder=-2) # ax2.text(x, y, kper, transform=ax.transData, ha='center', va='top') # kpers.add(kper) #ax2.text(0.5, -0.07, 'Model Stress Period', ha='center', va='top', transform=ax.transAxes) else: fig, ax = plt.subplots(1, 1, sharex=True, figsize=(11, 8.5)) series.plot(ax=ax, c='C0') ax.axhline(0, zorder=-1, lw=0.5, c='k') ax2 = ax single_subplot = True # add stress period info ymin, ymax = ax.get_ylim() pad = 0.02 if single_subplot: pad = 0.1 yloc = np.ones(len(df)) * (ymin - pad * (ymax - ymin)) if xtick_stride is None: xtick_stride = int(np.round(len(df) / 10, 0)) xtick_stride = 1 if xtick_stride < 1 else xtick_stride kpers = set() for x, y in zip(df.index.values[::xtick_stride], yloc[::xtick_stride]): kper = int(df.loc[x, 'kper']) # only make one line for each stress period if kper not in kpers: ax.axvline(x, lw=0.5, c='k', zorder=-2) ax2.axvline(x, lw=0.5, c='k', zorder=-2) ax2.text(x, y, kper, transform=ax.transData, ha='center', va='top') kpers.add(kper) ax2.text(0.5, -0.07, 'Model Stress Period', ha='center', va='top', transform=ax.transAxes) title_text = ' '.join((title_prefix, term.split('_')[0], title_suffix)).strip() ax.set_title(title_text) # set x axis limits xmin, xmax = series.index.min(), series.index.max() if plot_start_date is not None: if not datetime_xaxis: loc = df.datetime >= plot_start_date xmin = df.datetime.loc[loc].index[0] else: xmin = pd.Timestamp(plot_start_date) if plot_end_date is not None: if not datetime_xaxis: loc = df.datetime <= plot_end_date xmax = df.datetime.loc[loc].index[-1] else: xmax = pd.Timestamp(plot_end_date) ax.set_xlim(xmin, xmax) if not datetime_xaxis: if 'datetime' in df.columns: xticks = ax2.get_xticks() datetime_labels = [] for i in xticks: if i in df.index: dt_label = df['datetime'].loc[int(i)].strftime('%Y-%m-%d') else: dt_label = '' datetime_labels.append(dt_label) ax2.set_xticklabels(datetime_labels, rotation=90) ax2.set_xlabel(None) if not isinstance(df.index, pd.DatetimeIndex): ax2.set_xlabel('Time since the start of the simulation, in model units') plotted.update({term, out_term})