Source code for mfobs.sep

"""Functions related to hydrograph separation
"""
import numpy as np
import pandas as pd
    
    
[docs] def ih_method(Qseries, block_length=5, tp=0.9, interp_semilog=True, freq='D', limit=100): """Baseflow separation using the Institute of Hydrology method, as documented in Institute of Hydrology (1980) and Wahl and Wahl (1988). Parameters ---------- Qseries : pandas Series Pandas time series (with datetime index) containing measured streamflow values. block_length : int N parameter in IH method. Streamflow is partitioned into N-day intervals; a minimum flow is recorded for each interval. tp : float f parameter in IH method. For each three N-day minima, if f * the central value is less than the adjacent two values, the central value is considered a turning point. Baseflow is interpolated between the turning points. interp_semilog : boolean If False, linear interpolation is used to compute baseflow between turning points (as documented in the IH method). If True, the base-10 logs of the turning points are interpolated, and the interpolated values are transformed back to linear space (producing a curved hydrograph). Semi-logarithmic interpolation as documented in Wahl and Wahl (1988), is used in the Base-Flow Index (BFI) fortran program. This method reassigns zero values to -2 in log space (0.01) for the interpolation. freq : str or DateOffset, default ā€˜D’ Any `pandas frequency alias <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases>`_ Regular time interval that forms the basis for base-flow separation. Input data are resampled to this frequency, and block lengths represent the number of time increments of the frequency. By default, days ('D'), which is what all previous BFI methods are based on. Note that this is therefore an experimental option; it is up to the user t o verify any results produced by other frequencies. limit : int Maximum number of timesteps allowed during linear interploation between baseflow ordinances. Must be greater than zero. Returns ------- Q : pandas DataFrame DataFrame containing the following columns: minima : N-day minima ordinate : selected turning points n : block number for each N-day minima QB : computed baseflow Q : discharge values Notes ----- Whereas this program only selects turning points following the methodology above, the BFI fortran program adds artificial turning points at the start and end of each calendar year. Therefore results for datasets consisting of multiple years will differ from those produced by the BFI program. References ---------- Institute of Hydrology, 1980b, Low flow studies report no. 3--Research report: Wallingford, Oxon, United Kingdom, Institute of Hydrology Report no. 3, p. 12-19 Wahl, K.L and Wahl, T.L., 1988. Effects of regional ground-water level declines on streamflow in the Oklahoma Panhandle. In Proceedings of the Symposium on Water-Use Data for Water Resources Management, American Water Resources Association. """ if len(Qseries) < 2 * block_length: raise ValueError('Input Series must be at ' 'least two block lengths\nblock_length: ' '{}\n{}'.format(block_length, Qseries)) # convert flow values to numeric if they are objects # (pandas will cast column as objects if there are strings such as "ICE") # coerce any strings into np.nan values if Qseries.dtype.name == 'object': Qseries = pd.to_numeric(Qseries, errors='coerce') # convert the series to a dataframe; resample to daily values # missing days will be filled with nan values df = pd.DataFrame(Qseries).resample(freq).mean() df.columns = ['Q'] # compute block numbers for grouping values on blocks nblocks = int(np.floor(len(df) / float(block_length))) # make list of ints, one per measurement, denoting the block # eg [1,1,1,1,1,2,2,2,2,2...] for block_length = 5 n = [] for i in range(nblocks): n += [i + 1] * block_length n += [np.nan] * (len(df) - len(n)) # pad any leftover values with nans df['n'] = n # compute the minimum for each block # create dataframe Q, which only has minimums for each block blocks = df[['Q', 'n']].reset_index(drop=True).dropna(axis=0).groupby('n') Q = blocks.min() Q = Q.rename(columns={'Q': 'block_Qmin'}) Q['n'] = Q.index # get the index position of the minimum Q within each block idx_Qmins = blocks.idxmin()['Q'].values.astype(int) # get the date associated with each Q minimum Q['datetime'] = df.index[idx_Qmins] # compute baseflow ordinates Q['ordinate'] = [np.nan] * len(Q) Qlist = Q.block_Qmin.tolist() Q['Qi-1'] = [np.nan] + Qlist[:-2] + [np.nan] Q['Qi'] = [np.nan] + Qlist[1:-1] + [np.nan] Q['Qi+1'] = [np.nan] + Qlist[2:] + [np.nan] isordinate = tp * Q.Qi < Q[['Qi-1', 'Qi+1']].min(axis=1) Q.loc[isordinate, 'ordinate'] = Q.loc[isordinate, 'block_Qmin'] # reset the index of Q to datetime Q.index = Q.datetime # expand Q dataframe back out to include row for each day Q = Q.dropna(subset=['datetime'], axis=0).resample(freq).mean() # reassign the original flow values back to Q Q['Q'] = df.Q.loc[Q.index] Q.loc[Q['Q'] == 0, 'ordinate'] = 0 # interpolate between baseflow ordinates if interp_semilog: iszero = Q.ordinate.values == 0 logQ = np.log10(Q.ordinate) # fill zero values for the purpose of semi-log interpolation # (all baseflow values coinciding with # total flow values of zero will be reset to zero) # use the 1% quantile for filling # otherwise semi-log interpolation might be unreasonable (too rapid) # for larger units like cubic feet per day fill_value = np.log10(df.Q.loc[df.Q > 0].quantile(0.01)) logQ[iszero] = fill_value QB = np.power(10.0, logQ.interpolate(limit=limit).values) else: QB = Q.ordinate.interpolate(limit=limit).values Q['QB'] = QB # in places where 'Q' is zero, set 'QB' to zero as well # (for example, zero-flow values past the interpolation limit) # otherwise, an inconsistent number of flow values can be returned for a site, # depending on how many zero values are simulated, # which for example would cause PEST to crash during parameter estimation Q.loc[Q['Q'] == 0, 'QB'] = 0 # ensure that no baseflow values are > Q measured QBgreaterthanQ = Q.QB.values > Q.Q.values Q.loc[QBgreaterthanQ, 'QB'] = Q.loc[QBgreaterthanQ, 'Q'] return Q