from collections import defaultdict
from pathlib import Path
import os
import re
import numpy as np
import pandas as pd
from shapely.geometry import Point
import yaml
from gisutils import df2shp, project, raster
from mfsetup.units import convert_length_units, convert_volume_units
from mapgwm.lookups import aq_codes_dict
from mapgwm.utils import cull_data_to_active_area
[docs]class Swuds:
""" Code for preprocessing non-agricultural water use information
into clean CSV input to MODFLOW setup. Class excludes AQ, IR, and TE
water use from a swuds excel dataset. Includes logic to fill missing data,
as data at many sites are limited to survey years (e.g. 2010 and 2015).
"""
def __init__(self, xlsx=None, sheet=None, csvfile=None,
site_no_col='SITE_NO',
x_coord_col='FROM_DEC_LONG_VA',
y_coord_col='FROM_DEC_LAT_VA',
start_date=None, end_date=None,
source_crs=4269, dest_crs=5070,
data_length_units='feet',
data_volume_units='mgal',
model_length_units='meters',
default_screen_len=20,
cols='default'):
""" Constructor for the swuds class. Class methods will help pre-process
water-use data for MAP models. Dataframe produced by constructor
has original SWUDS data, useful for debugging.
Parameters
----------
xlsx: str
Path to xlsx data to be read. If xlsx file is passed, then a
selected worksheet from it will be converted to a csv file unless
the csvfile parameter is specified as None.
csvfile: str
Path to csv file with data (if xlsx if None) or to a csvfile
that is created from the selected worksheet. If xlsx is None,
then csvfile must be provided.
sheet: str
Name of worksheet in xlsx to be read, ignored if xlsx is None.
cols: list of str
List of columns to read from xlsx or csv, if None all columns are read.
If 'default' (which is the default if nothing is specified) the default
list of columns coded in the script is read.
site_no_col : str, optional
Column name in data with site identifiers,
by default 'SITE_NO'
x_coord_col : str, optional
Column name in data with x-coordinates,
by default 'x'
y_coord_col : str, optional
Column name in data with y-coordinates,
by default 'y'
start_date: str
start time for simulation as string 'yyyy-mm-dd'
end_date: str
end time for simulation as string 'yyyy-mm-dd'
source_crs : obj
Coordinate reference system of the head observation locations.
A Python int, dict, str, or :class:`pyproj.crs.CRS` instance
passed to :meth:`pyproj.crs.CRS.from_user_input`
Can be any of:
- PROJ string
- Dictionary of PROJ parameters
- PROJ keyword arguments for parameters
- JSON string with PROJ parameters
- CRS WKT string
- An authority string [i.e. 'epsg:4326']
- An EPSG integer code [i.e. 4326]
- A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')]
- An object with a `to_wkt` method.
- A :class:`pyproj.crs.CRS` class
By default, epsg:4269
dest_crs : obj
Coordinate reference system of the model. Same input types
as ``source_crs``.
By default, epsg:5070
data_length_units : str; 'meters', 'feet', etc.
Units of lengths in data (elevations, depths, etc.)
by default, 'feet'
data_volume_units : str; 'mgd', 'ft3', etc
Volumetric unit of pumping rates
by default, 'mgd' (million gallons per day)
model_length_units : str; 'meters', 'feet', etc.
Length units of model.
by default, 'meters'
Attributes
----------
df: pandas dataframe
pandas dataframe read from spreadsheet or csv. Manipulated by other
methods of the class
aquifer_names: dict
dictionary of aquifer names keyed by NWIS codes, read using import
statement from mapgwm.lookups
regional_aquifers: dict
dictionary of regional aquifers keyed by NWIS codes, read using import
statement from mapgwm.lookups
crs: int
pyproj.crs.CRS instance describing the output coordinate reference system
monthly_cols: list of str
list of column names for monthly values
start_date: str
start time for simulation as string 'yyyy-mm-dd'
end_date: str
end time for simulation as string 'yyyy-mm-dd'
default_screen_len: float
default screen length in meters
locations: dict
dictionary of x,y locations keyed by SITE_NO, added in reproject method
depths: dict
dictionary of depth keyed by SITE_NO
well_elevations: dict
dictionary of well elevations keyed by SITE_NO
prod_zone_top: defaultdict(dict)
defaultdict with production zone top (in meters) for each well
First key is the production zone name, and second is the SITE_NO
For example self.prod_zone_top['lower_claiborne']['WEL001'] = top_elev
prod_zone_bot: defaultdict(dict)
defaultdict with production zone bottom (in meters) for each well
First key is the production zone name, and second is the SITE_NO
For example self.prod_zone_bot['lower_claiborne']['WEL001'] = bot_elev
"""
# set some attributes
self.monthly_cols = ['JAN_VAL', 'FEB_VAL', 'MAR_VAL', 'APR_VAL', 'MAY_VAL',
'JUN_VAL', 'JUL_VAL', 'AUG_VAL', 'SEP_VAL', 'OCT_VAL',
'NOV_VAL', 'DEC_VAL']
self.aquifer_names = aq_codes_dict['aquifer_code_names']
self.regional_aquifers = aq_codes_dict['regional_aquifer']
self.start_date = start_date
self.end_date = end_date
self.default_screen_len = default_screen_len
self.source_crs = source_crs
self.dest_crs = dest_crs
self.data_length_units = data_length_units
self.data_volume_units = data_volume_units
self.model_length_units = model_length_units
self.prod_zone_top = defaultdict(dict)
self.prod_zone_bot = defaultdict(dict)
self.locations = dict()
self.site_no_col = site_no_col
# now read in excel or csv file
defaultcols=["SITE_NO","WATER_CD","FROM_DEC_LAT_VA","FROM_DEC_LONG_VA","FROM_WELL_DEPTH_VA",
"FROM_ALT_VA","FROM_NAT_WATER_USE_CD","FROM_NAT_AQFR_CD","FROM_NAT_AQFR_NM","FROM_AQFR_CD",
"FROM_AQFR_NM","YEAR","SALINITY_CD","JAN_VAL","FEB_VAL","MAR_VAL","APR_VAL","MAY_VAL",
"JUN_VAL","JUL_VAL","AUG_VAL","SEP_VAL","OCT_VAL","NOV_VAL","DEC_VAL","ANNUAL_VAL",
"FROM_STATE_NM","FROM_COUNTY_NM","FROM_CONSTRUCTION_DT","FROM_INVENTORY_DT"]
if isinstance(cols, list):
usecols = cols
elif cols is not None:
usecols = defaultcols
else:
usecols = None
if xlsx is None and csvfile is None:
raise ValueError('both xlsx and csvfile cannont be None for SWUDS object')
elif xlsx is None:
self.df = pd.read_csv(csvfile, usecols=usecols)
else:
self.df = pd.read_excel(xlsx, sheet_name=sheet, usecols=usecols)
if csvfile is not None:
self.df.to_csv(csvfile, index=False)
self.df.columns = self.df.columns.str.upper()
# remove trailing spaces from code names
if 'FROM_AQFR_CD' in self.df.columns:
self.df["FROM_AQFR_CD"] = self.df["FROM_AQFR_CD"].str.strip()
# make depths floats
if 'FROM_WELL_DEPTH_VA' in self.df.columns:
self.df['SCREEN_BOT'] = pd.to_numeric(self.df['FROM_WELL_DEPTH_VA'], errors='coerce')
else:
self.df['SCREEN_BOT'] = np.nan
if 'FROM_ALT_VA' in self.df.columns:
self.df['FROM_ALT_VA'] = pd.to_numeric(self.df['FROM_ALT_VA'], errors='coerce')
# make dictionaries
length_conversion = convert_length_units(data_length_units, model_length_units)
self.depths = dict(list(zip(self.df['SITE_NO'], self.df['SCREEN_BOT'] * length_conversion)))
self.well_elevations = dict(zip(self.df['SITE_NO'], self.df['FROM_ALT_VA'] * length_conversion))
self.sort_sites(primarysort=site_no_col)
# Best to reproject on init so that we know the points are in the dest_crs
self.reproject(x_coord_col=x_coord_col, y_coord_col=y_coord_col, key=site_no_col)
[docs] def sort_sites(self, primarysort='SITE_NO', secondarysort=None):
""" Sort the dataframe by site number and quantity, or passed parameter
Parameters
----------
primarysort: str
string to sort group groups, defaults to SITE_NO
secondarysort: str
variable in dataframe to sort SITE_NO groups, default is None
"""
if secondarysort is not None:
self.df = self.df.sort_values([primarysort, secondarysort]).groupby([primarysort]).last().reset_index()
else:
self.df = self.df.sort_values([primarysort]).groupby([primarysort]).last().reset_index()
[docs] def reproject(self, x_coord_col='FROM_DEC_LONG_VA', y_coord_col='FROM_DEC_LAT_VA', key='SITE_NO'):
""" Reproject from self.source_crs to self.dest_crs using gisutils
Parameters
----------
x_coord_col : str, optional
Column name in data with x-coordinates,
by default 'x'
y_coord_col : str, optional
Column name in data with y-coordinates,
by default 'y'
key: str
key for the dictionary made, defaults to SITE_NO
"""
x_reprj, y_reprj = project(zip(self.df[x_coord_col], self.df[y_coord_col]),
self.source_crs, self.dest_crs)
self.df['x'] = x_reprj
self.df['y'] = y_reprj
self.df['geometry'] = [Point(x, y) for x, y in zip(x_reprj, y_reprj)]
# drop entries if no location information
self.df.dropna(subset=['x', 'y'], axis=0, inplace=True)
# make dictionary of location
self.locations = dict(list(zip(self.df[key], list(zip(self.df['x'], self.df['y'])))))
[docs] def assign_missing_elevs(self, top_raster, dem_units='meters'
):
""" Use the top of model raster, or land-surface raster,
to assign the elevation for points where elevation is missing.
Parameters
----------
top_raster: str
path to raster data set with land surface or model top elevation, used
to assign missing values to water-use points
elev_field: str
field in df with elevation data, default is 'FROM_ALT_VA'
"""
no_elev = self.df['FROM_ALT_VA'].isnull()
x_no_elev = self.df.loc[no_elev, 'x'].values
y_no_elev = self.df.loc[no_elev, 'y'].values
elevs = raster.get_values_at_points(top_raster,
x=x_no_elev,
y=y_no_elev,
points_crs=self.dest_crs
)
elevs *= convert_length_units(dem_units, self.model_length_units)
self.df.loc[no_elev, 'FROM_ALT_VA'] = elevs
assert not self.df['FROM_ALT_VA'].isnull().any()
self.well_elevations = dict(zip(self.df['SITE_NO'], self.df['FROM_ALT_VA']))
[docs] def make_production_zones(self, production_zones, default_elevation_units='feet'):
""" Make dictionary attributes for production zones.
These are used to assign individual wells to production zones.
The defaultdict is keyed by zone_name and then SITE_NO.
Parameters
----------
zonelist: list of lists
List of production zone information, each zone requires a
list with [zone_name, zone_top, zone_bot]
zone_name: str
name assigned to prodcuction zone
zone_top: str
path to raster with top of zone
zone_bot: str
path to raster to bottom of zone
key: str
key (column name) to use in the resulting
parameter zone dictionaries. Defaults to SITE_NO
"""
# if only one list is passed, put it into a list.
#if isinstance(zonelist[0], str):
# zonelist = [zonelist]
key = self.site_no_col
# get tops and bottoms of estimated production intervals at each well
# make dictionaries to lookup by well
for name, info in production_zones.items():
top_raster, botm_raster, *units = info
units = units[0] if units else default_elevation_units
x = self.df['x'].values
y = self.df['y'].values
length_unit_conversion = convert_length_units(units, self.model_length_units)
top_elevations = raster.get_values_at_points(top_raster,
x=x, y=y) * length_unit_conversion
self.prod_zone_top[name] = dict(zip(self.df[key], top_elevations))
botm_elevations = raster.get_values_at_points(botm_raster,
x=x, y=y) * length_unit_conversion
self.prod_zone_bot[name] = dict(zip(self.df[key], botm_elevations))
self.df['{}_top'.format(name)] = top_elevations
self.df['{}_botm'.format(name)] = botm_elevations
[docs] def assign_monthly_production(self, outfile='processed_swuds.csv'):
""" Assign production wells for water use, skipping IR (irrigation) and
TE (thermal electric) to production zones. If production zones are not
assigned or if the well bottom doesn't fall into a production zone, then
the screen_top and screen_bot are assigned using well_depth and the
default screen length.
Production is given in cubic m per day.
todo: add unit conversion parameter so other units can be used?
Parameters
----------
outfile: str
path to final processed monthly water-use file with production zone
information
"""
# fill in missing monthly values with annual value
for c in self.monthly_cols:
idx = self.df.loc[self.df[c].isnull()].index.values
self.df.loc[idx, c] = self.df.loc[idx, 'ANNUAL_VAL']
# pull out groundwater sites that are not IR, AQ or TE
self.df = self.df.loc[
(self.df['WATER_CD'] == 'GW') &
~(self.df['FROM_NAT_WATER_USE_CD'] == 'IR') &
~(self.df['FROM_NAT_WATER_USE_CD'] == 'AQ') &
~(self.df['FROM_NAT_WATER_USE_CD'] == 'TE')
]
# reshape dataframe to have monthly values in same column
stacked = pd.DataFrame(self.df[self.monthly_cols].stack())
stacked.reset_index(inplace=True)
stacked.rename(columns={'level_1': 'month',
0: 'q_monthly'}, inplace=True)
stacked.q_monthly = stacked.q_monthly
stacked.index = stacked.level_0
stacked = stacked.join(self.df)
keep_cols = [c for c in stacked.columns if c not in self.monthly_cols]
stacked = stacked[keep_cols]
month = {name: i + 1 for i, name in enumerate(self.monthly_cols)}
dates = ['{}-{:02d}'.format(year, month[month_column_name])
for year, month_column_name in zip(stacked.YEAR, stacked.month)]
stacked['datetime'] = pd.to_datetime(dates)
stacked.sort_values(by=['SITE_NO', 'datetime'], inplace=True)
# set start and end dates if not already set
if self.start_date is None:
self.start_date = stacked.datetime.min()
if self.end_date is None:
self.end_date = stacked.datetime.max()
groups = stacked.groupby('SITE_NO')
all_groups = []
for site_no, group in groups:
group = group.copy()
group.index = pd.to_datetime(group['datetime'])
start_date = pd.Timestamp(self.start_date)
end_date = pd.Timestamp(self.end_date)
monthly_values_2010 = group.loc[group.datetime.dt.year == 2010]
monthly_values_2010 = dict(zip(monthly_values_2010.datetime.dt.month,
monthly_values_2010.q_monthly))
avg_monthly_values = group.groupby(group.index.month).mean().q_monthly.to_dict()
q_mean = group.q_monthly.mean()
# reindex the site data to include all months for simulation period
all_dates = pd.date_range(start_date, end_date, freq='MS')
group = group.reindex(all_dates)
# fill empty dates
q = []
for month, q_monthly in zip(group.index.month, group.q_monthly):
# try to use 2010 values if they exist
if np.isnan(q_monthly):
q_monthly = monthly_values_2010.get(month, np.nan)
# otherwise take the average value for each month
if np.isnan(q_monthly):
q_monthly = avg_monthly_values[month]
# fill missing months with the mean value for the site
if np.isnan(q_monthly):
q_monthly = q_mean
q.append(q_monthly)
# assume most values represent abstraction
# if sum is positive, invert so that output values are negative
if np.sum(q) > 0:
q = -np.array(q)
group['q'] = q
#group['q'] = group['q'] * 3785.4 # convert from mgd to cubic m per d
group['q'] = group['q'] * convert_volume_units(self.data_volume_units,
self.model_length_units)
group['site_no'] = f'swuds_{site_no}'
group['well_elev'] = self.well_elevations[site_no]
group['depth'] = self.depths[site_no]
well_botm_depth = self.well_elevations[site_no] - self.depths[site_no]
group['x'] = np.nanmin(group['x'])
group['y'] = np.nanmin(group['y'])
# assign a production zone from default dict. If the bottom of the
# well does not fall in a zone, or if the dictionary is empty; then
# the production zone is assigned 'unnamed'
production_zone = 'unnamed'
for prod_name in self.prod_zone_top.keys():
prod_zone_top = self.prod_zone_top[prod_name][site_no]
prod_zone_bot = self.prod_zone_bot[prod_name][site_no]
if np.isnan(prod_zone_top) or np.isnan(prod_zone_bot): # missing zone
group['screen_bot'] = self.well_elevations[site_no] - self.depths[site_no]
group['screen_top'] = self.well_elevations[site_no] - self.depths[site_no] + self.default_screen_len
group['open_int_method'] = 'well depth'
else:
if well_botm_depth < prod_zone_top and well_botm_depth > prod_zone_bot:
production_zone = prod_name
group['screen_bot'] = prod_zone_bot
group['screen_top'] = prod_zone_top
group['open_int_method'] = 'production zone'
else:
group['screen_bot'] = self.well_elevations[site_no] - self.depths[site_no]
group['screen_top'] = self.well_elevations[site_no] - self.depths[site_no] + self.default_screen_len
group['open_int_method'] = 'well depth'
group['production_zone'] = production_zone
# add aquifer name
group['aquifer_name'] = self.aquifer_names.get(group["FROM_AQFR_CD"].values[0], 'unnamed')
cols = ['site_no', 'q', 'q_monthly', 'month', 'well_elev', 'depth',
'screen_bot', 'screen_top', 'x', 'y']
all_groups.append(group[cols])
self.df = pd.concat(all_groups)
self.df['start_datetime'] = self.df.index # start date of each pumping period
if outfile is not None:
outfile = Path(outfile)
self.df.to_csv(outfile, index=False)
print('processed SWUDS data written to {0} and in dataframe attribute'.format(outfile))
self.df['geometry'] = [Point(x, y) for x, y in zip(self.df.x,
self.df.y)]
# write only unique pumping values to shapefile
to_shapefile = self.df.groupby(['site_no', 'q']).first().reset_index()
shapefile = outfile.with_suffix('.shp')
df2shp(to_shapefile, shapefile, crs=self.dest_crs)
[docs] @classmethod
def from_yaml(cls, yamlfile):
""" Read input and output files from yaml file
and run all the processing steps in the class to
produce the processed csv file.
Parameters
----------
yamlfile: str
path to a yaml file containing input and output information
Returns
-------
wu: Swuds object
returns a Swuds object and also generates processed csv file
specified in the yaml file.
"""
with open(yamlfile, 'r') as inputs:
yaml_inputs = yaml.safe_load(inputs)
data_path = yaml_inputs['raw_data']['data_path']
data_path = Swuds.fix_path(data_path)
swuds_input = os.path.join(data_path, yaml_inputs['raw_data']['swuds_input'])
worksheet = yaml_inputs['raw_data']['worksheet']
outcsv = os.path.join(data_path, yaml_inputs['output']['outcsv'])
processed_csv = os.path.join(data_path, yaml_inputs['output']['processed_csv'])
dem = os.path.join(data_path,yaml_inputs['rasters']['dem'])
mc_top = os.path.join(data_path, yaml_inputs['rasters']['mc_top'])
mc_bot = os.path.join(data_path, yaml_inputs['rasters']['mc_bot'])
lc_top = os.path.join(data_path, yaml_inputs['rasters']['lc_top'])
lc_bot = os.path.join(data_path, yaml_inputs['rasters']['lc_bot'])
meras_shp = os.path.join(data_path, yaml_inputs['shapefiles']['extent'])
wu_shp = os.path.join(data_path, yaml_inputs['shapefiles']['wells_out'])
# make a swuds object
wu = cls(xlsx=swuds_input, sheet=worksheet, csvfile=outcsv)
# process
wu.sort_sites()
wu.reproject()
wu.apply_footprint(meras_shp, outshp=wu_shp)
wu.assign_missing_elevs(dem)
for zn in yaml_inputs['zones']:
zn[1] = Swuds.fix_path(zn[1])
zn[2] = Swuds.fix_path(zn[2])
wu.make_production_zones(yaml_inputs['zones'])
# write results to csv file and return object
wu.assign_monthly_production(processed_csv)
return wu
[docs] @staticmethod
def fix_path(data_path):
""" Convert simple path string with forward slashes
to a path used by python using os.path.join(). This
function allows the user to specify a path in the
yaml file simply, for example:
d:/home/MAP/source_data/wateruse.csv
The string is split on '/' and resulting list
is passed to os.path.join(). If the first entry
has a colon, then os.path.join(entry[0], os.path.sep)
is used to specify a windows drive properly.
Parameters
----------
data_path: str
path read from yaml file
Returns
-------
data_path: str
path built using os.path.join()
"""
if re.search('/', data_path):
parts = re.split('/', data_path)
if re.search(':', parts[0]):
data_path = os.path.join(parts[0], os.path.sep)
del parts[0]
data_path = os.path.join(data_path, *parts)
else:
data_path = os.path.join(*parts)
return(data_path)
# def overlaps(self, a, b):
# """ Return the amount of overlap, in bp
# between a and b.
# Parameters
# ----------
# a:
# b:
# Returns
# -------
# int:
# If >0, the number of bp of overlap
# If 0, they are book-ended.
# If <0, the distance in bp between them
# """
# return min(a[1], b[1]) - max(a[0], b[0])
[docs]def preprocess_swuds(swuds_input, worksheet, csv_input=None,
dem=None, dem_units='meters',
start_date=None,
end_date=None,
active_area=None,
active_area_id_column=None,
active_area_feature_id=None,
site_no_col='SITE_NO',
x_coord_col='FROM_DEC_LONG_VA',
y_coord_col='FROM_DEC_LAT_VA',
production_zones=None,
estimated_production_surface_units='feet',
source_crs=4269,
dest_crs=5070,
data_length_units='feet',
data_volume_units='mgal',
model_length_units='meters',
outfile=None):
"""Preprocess water use data from the USGS Site-Specific Water Use Database (SWUDS).
* reproject data to a destination CRS `dest_crs`)
* cull data to an area of interest (`active_area`)
* assign any missing wellhead elevations from a DEM
* if input data do not have information on the well screen intervals;
sample screen tops and bottoms from raster surfaces bounding
an estimated production zone (e.g. `estimated_production_zone_top`). Well
bottom information is used to discriminate between multiple production zones.
* reindex the data to continous monthly values extending from `start_date`
to `end_date`. Typically, these would bracket the time period for which
the pumping should be simulated in a model. For example, the earliest data
may be from 2010, but if the model starts in 2008, it may be appropriate to
begin using the 2010 rates then (``start_date='2008'``). If no start or end
date are given, the first and last years of pumping in `data` are used.
* fill empty months using 2010 data (the most complete survey year) if available,
otherwise use the average value for the site.
* backfill any remaining empty months going back to the `start_date` in the same way
* write processed data to a CSV file and shapefile of the same name
Parameters
----------
swuds_input: str
Excel spreadsheet of SWUDs data, in a format readable by
:func:`pandas.read_excel`. If xlsx file is passed, then a
selected worksheet from it will be converted to a csv file unless
the csvfile parameter is specified as None.
worksheet : str
Worksheet in `swuds_input` to read.
csvfile: str, optional
Path to csv file with data (if xlsx if None) or to a csvfile
that is created from the selected worksheet. If xlsx is None,
then csvfile must be provided.
sheet: str
Name of worksheet in xlsx to be read, ignored if xlsx is None.
dem : str, optional
DEM raster of the land surface. Used for estimating missing wellhead elevations.
Any reprojection to dest_crs is handled automatically, assuming
the DEM raster has CRS information embedded (arc-ascii grids do not!)
By default, None.
dem_units : str, {'feet', 'meters', ..}
Units of DEM elevations, by default, 'meters'
start_date : str
Start date for pumping rates. If earlier than the dates in `data`,
pumping rates will be backfilled to this date.
end_date : str
End date for pumping rates. If later than the dates in `data`,
pumping rates will be forward filled to this date.
active_area : str
Shapefile with polygon to cull observations to. Automatically reprojected
to dest_crs if the shapefile includes a .prj file.
by default, None.
active_area_id_column : str, optional
Column in active_area with feature ids.
By default, None, in which case all features are used.
active_area_feature_id : str, optional
ID of feature to use for active area
By default, None, in which case all features are used.
site_no_col : str, optional
Column name in data with site identifiers,
by default 'SITE_NO'
x_coord_col : str, optional
Column name in data with x-coordinates,
by default 'FROM_DEC_LONG_VA'
y_coord_col : str, optional
Column name in data with y-coordinates,
by default 'FROM_DEC_LAT_VA'
production_zones : dict
Dictionary of production zone tops and bottoms, and optionally,
production zone surface elevation units, keyed by an abbreviated name.
Example::
production_zones={'mrva': (test_data_path / 'swuds/rasters/pz_MCAQP_top.tif',
test_data_path / 'swuds/rasters/pz_MCAQP_bot.tif',
),
'mcaq': (test_data_path / 'swuds/rasters/pz_MCAQP_top.tif',
test_data_path / 'swuds/rasters/pz_MCAQP_bot.tif',
'feet')
}
Where zone 'mrva' has a top and bottom raster, but no units assigned,
and zone 'mcaq' has a top and bottom raster and units. For zones
with no units, the `estimated_production_surface_units` will be used.
estimated_production_surface_units : str, {'meters', 'ft', etc.}
Length units of elevations in estimated production surface rasters.
by default, 'feet'
source_crs : obj
Coordinate reference system of the head observation locations.
A Python int, dict, str, or :class:`pyproj.crs.CRS` instance
passed to :meth:`pyproj.crs.CRS.from_user_input`
Can be any of:
- PROJ string
- Dictionary of PROJ parameters
- PROJ keyword arguments for parameters
- JSON string with PROJ parameters
- CRS WKT string
- An authority string [i.e. 'epsg:4326']
- An EPSG integer code [i.e. 4326]
- A tuple of ("auth_name": "auth_code") [i.e ('epsg', '4326')]
- An object with a `to_wkt` method.
- A :class:`pyproj.crs.CRS` class
By default, epsg:4269
dest_crs : obj
Coordinate reference system of the model. Same input types
as ``source_crs``.
By default, epsg:5070
data_length_units : str; 'meters', 'feet', etc.
Units of lengths in data (elevations, etc.)
by default, 'meters'
data_volume_units : str; 'mgd', 'ft3', etc
Volumetric unit of pumping rates
by default, 'mgd' (million gallons per day)
model_length_units : str; 'meters', 'feet', etc.
Length units of model.
by default, 'meters'
outfile : str
Path for output file. A shapefile of the same name is also written.
If None, no output file is written. By default, None
Returns
-------
wu : :class:`mapgwm.swuds.Swuds` instance
Notes
-----
* time units for SWUDs data and model are assumed to be days
"""
# make a swuds object and process it
wu = Swuds(xlsx=swuds_input, sheet=worksheet, csvfile=csv_input,
start_date=start_date,
end_date=end_date,
site_no_col=site_no_col,
x_coord_col=x_coord_col,
y_coord_col=y_coord_col,
source_crs=source_crs,
dest_crs=dest_crs,
data_length_units=data_length_units,
data_volume_units=data_volume_units,
model_length_units=model_length_units,
)
# cull the data to the model area, if provided
if active_area is not None:
wu.apply_footprint(active_area=active_area,
active_area_id_column=active_area_id_column,
active_area_feature_id=active_area_feature_id)
wu.assign_missing_elevs(dem, dem_units=dem_units
)
#mc = ['middle_claiborne', mc_top, mc_bot]
#lc = ['lower_claiborne', lc_top, lc_bot]
if production_zones is not None:
wu.make_production_zones(production_zones,
default_elevation_units=estimated_production_surface_units)
wu.assign_monthly_production(outfile=outfile)
return wu
if __name__ == '__main__':
home = os.getcwd()
data_path = os.path.join(os.path.dirname(home), 'working_data')
# swuds_input = os.path.join(data_path, 'LMG-withdrawals-2000-2018.xlsx')
# worksheet = 'LMG-withdrawals-2000-2018'
# outcsv = os.path.join(data_path, 'swuds_nonTE.csv')
# dem = os.path.join(data_path, 'dem_mean_elevs.tif')
# mc_top = os.path.join(data_path, 'mcaq_surf.tif')
# mc_bot = os.path.join(data_path, 'mccu_surf.tif')
# lc_top = os.path.join(data_path, 'lcaq_surf.tif')
# lc_bot = os.path.join(data_path, 'lccu_surf.tif')
# meras_shp = os.path.join(data_path, 'MERAS_Extent.shp')
# wu_shp = os.path.join(data_path, 'WU_points.shp')
# # make a swuds object
# # swuds = Swuds(xlsx=swuds_input, sheet=worksheet, csvfile=outcsv)
# swuds = Swuds(xlsx=None, sheet=None, csvfile=outcsv, cols=None)
# swuds.sort_sites()
# swuds.reproject()
# swuds.apply_footprint(meras_shp, outshp=wu_shp)
# swuds.assign_missing_elevs(dem)
# mc = ['middle_claiborne', mc_top, mc_bot]
# lc = ['lower_claiborne', lc_top, lc_bot]
# swuds.make_production_zones([mc, lc])
# swuds.assign_monthly_production(os.path.join(data_path, 'processed_swuds.csv'))
# print(swuds.df.head())
yml_file = os.path.join(data_path, 'swuds_input.yml')
wu = Swuds.from_yaml(yml_file)