"""
Miscellaneous utilities
"""
import inspect
import pprint
import numpy as np
import pandas as pd
[docs]
def fill_nats(df, perioddata):
"""Fill in NaT (not a time) values with
corresponding date for that stress period.
Parameters
----------
df : DataFrame
Observation data. Must have 'datetime' column
with date and 'per' column with stress period.
perioddata : DataFrame
Perioddata table produced by modflow-setup. Must have
'per' column and 'start_datetime' column.
Returns
-------
Operates on perioddata in-place.
"""
period_start_datetimes = pd.to_datetime(perioddata['start_datetime'])
start_datetimes = dict(zip(perioddata['per'], period_start_datetimes))
datetime = [start_datetimes[per] if pd.isnull(dt) else dt
for per, dt in zip(df['per'], df['datetime'])]
df['datetime'] = datetime
[docs]
def set_period_start_end_dates(perioddata):
"""Ensure that the start and end dates for consecutive periods are consistent
but don't overlap. For example given two consecutive periods start dates of
3/1/2020 and 4/1/2020, we want the end dates should be 3/31/2020 and 4/30/2020,
so that a selection in pandas with :meth:`pandas.DataFrame.loc` doesn't include
values from the next period.
Parameters
----------
perioddata : DataFrame
Perioddata table produced by modflow-setup. Must have
'start_datetime' and either a 'time' column
(of elapsed times at the end of each period, in days),
or an 'end_datetime' column of period end dates.
Returns
-------
Operates on perioddata in-place.
Notes
-----
Assumes time units of days.
"""
if 'time' not in perioddata.columns and perioddata.index.name != 'time':
timedeltas = perioddata['end_datetime'] - perioddata['start_datetime'].min()
perioddata['time'] = timedeltas.dt.days + 1
if perioddata.index.name == 'time':
perioddata.sort_index(inplace=True)
else:
perioddata.sort_values(by='time', inplace=True)
# set new end dates based on start dates and time
start_datetimes = pd.to_datetime(perioddata['start_datetime'].values)
# base initial period or timestep length on difference between start and end datetime
initial_end_datetime = pd.to_datetime(perioddata['end_datetime'].values[0])
initial_perlen = np.max([1, (initial_end_datetime - start_datetimes[0]).days])
perlen = np.array([initial_perlen] + perioddata['time'].diff().tolist()[1:])
new_end_datetimes = start_datetimes + pd.to_timedelta(perlen - 1, unit='d')
# fix any invalid end_datetimes resulting from the assumption above
# (that perlen is between successive start dates)
# there may be gaps between the periods,
# for example in a successive steady-state simulation
overlapping_datetimes = new_end_datetimes[:-1] > perioddata['start_datetime'][1:]
if np.any(overlapping_datetimes):
next_start_datetimes = perioddata['start_datetime'][1:][overlapping_datetimes]
corr_end_datetimes = pd.to_datetime(next_start_datetimes) - pd.Timedelta(1, 'D')
# make boolean vector of full length
# (including last end datetime, which can't be invalid)
overlapping_datetimes = overlapping_datetimes.tolist() + [False]
# cast new_end_datetimes to a Series so that we can assign values via a slice
new_end_datetimes = pd.Series(new_end_datetimes)
new_end_datetimes[overlapping_datetimes] = corr_end_datetimes.tolist()
# recast to DateTimeIndex for consistnecy with no invalid_end_datetimes case
new_end_datetimes = pd.DatetimeIndex(new_end_datetimes)
perioddata['start_datetime'] = start_datetimes.strftime('%Y-%m-%d')
perioddata['end_datetime'] = new_end_datetimes.strftime('%Y-%m-%d')
# check for transient period end dates that overlap
if 'steady' in perioddata.columns:
overlapping_end_datetimes = ~(perioddata['steady'].values[:-1].astype(bool)) &\
((new_end_datetimes.diff().days)[1:] < 1)
if np.any(overlapping_end_datetimes):
bad_datetimes = perioddata.iloc[1:][overlapping_end_datetimes]
raise ValueError("Overlapping stress periods or timesteps; "
f"check perioddata input.\n{bad_datetimes}")
[docs]
def print_item(k, v):
print('{}: '.format(k), end='')
if isinstance(v, dict):
if len(v) > 1:
print('{{{}: {}\n ...\n}}'.format(*next(iter(v.items()))))
else:
print(v)
elif isinstance(v, list):
if len(v) > 3:
print('[{} ... {}]'.format(v[0], v[-1]))
else:
pprint.pprint(v, compact=True)
elif isinstance(v, pd.DataFrame):
print(v.head())
elif isinstance(v, np.ndarray):
txt = 'array: {}, {}'.format(v.shape, v.dtype)
try:
txt += ', min: {:g}, mean: {:g}, max: {:g}'.format(v.min(), v.mean(), v.max())
except:
pass
print(txt)
else:
print(v)