%matplotlib inline

import pytz
import matplotlib.pyplot as plt
import pandas as pd

import ulmo
from ulmo.util import convert_datetime
CUAHSI WaterOneFlow: ulmo, SOAP endpoint, and other general info

        `CUAHSI WaterOneFlow`_ web services
        .. _CUAHSI WaterOneFlow: http://his.cuahsi.org/wofws.html

print([obj for obj in dir(ulmo.cuahsi.wof) if not obj.startswith('__')])
    ['absolute_import', 'core', 'get_site_info', 'get_sites', 'get_values', 'get_variable_info']

# WaterML/WOF WSDL endpoints
wsdlurl = ''  # WOF 1.0

# 'network code'
networkcd = 'mysqlodm2timeseries'

Get site information

one of two sites in the LBR sample DB

sitecd = 'USU-LBR-Mendon'
siteinfo = ulmo.cuahsi.wof.get_site_info(wsdlurl, networkcd+':'+sitecd)
type(siteinfo), siteinfo.keys()
 ['code', 'name', 'series', 'notes', 'network', 'location', 'timezone_info'])
siteinfo['network'], siteinfo['code'], siteinfo['name']
 'Little Bear River at Mendon Road near Mendon, Utah')
    {'latitude': '41.718473', 'srs': 'EPSG:CUAHSI:4269', 'longitude': '-111.946402'}

type(siteinfo['series']), len(siteinfo['series']), siteinfo['series'].keys()
{'variable': {'code': 'USU33',
  'data_type': 'Average',
  'general_category': 'Water Quality',
  'id': '33',
  'name': 'Oxygen, dissolved percent of saturation',
  'no_data_value': '-9999.0000000000',
  'sample_medium': 'Unknown',
  'time': {},
  'units': {'abbreviation': '%',
   'code': '1',
   'name': 'percent',
   'type': 'Dimensionless'},
  'value_type': 'Unknown',
  'vocabulary': 'mysqlodm2timeseries'},
 '{http://www.cuahsi.org/water_ml/1.0/}_method': {'method_description': 'Dissolved oxygen measured using a Hydrolab MS5 Water Quality Multiprobe.',
  'method_id': '19',
  'method_link': 'http://www.hydrolab.com'},
 '{http://www.cuahsi.org/water_ml/1.0/}_quality_control_level': {'quality_control_level': '0',
  'quality_control_level_id': '0'},
 '{http://www.cuahsi.org/water_ml/1.0/}_source': {'organization': 'Utah State University Utah Water Research Laboratory',
  'source_description': 'Continuous water quality monitoring by Utah State University as part of the USDA CEAP Grant',
  'source_id': '1',
  'source_link': 'http://www.bearriverinfo.org'},
 '{http://www.cuahsi.org/water_ml/1.0/}value_count': {'value_count': '1440'},
 '{http://www.cuahsi.org/water_ml/1.0/}variable_time_interval': {'begin_date_time': '2007-09-01T00:00:00',
  'end_date_time': '2007-09-30T23:30:00',
  'variable_time_interval_type': 'TimeIntervalType'}}

Get Values

def site_series_values_to_df(series_values, variable_name):
    # Create a clean timeseries list of (dt, val) tuples
    tsdt_tuplst = [
         float(valdict['value'])) for valdict in series_values['values']

    dt, val = zip(*tsdt_tuplst)
    ts_df = pd.DataFrame({'time': dt, variable_name: val})
    ts_df.set_index('time', inplace=True)
    ts_df.sort_index(ascending=True, inplace=True)
    return ts_df
    ulmo.cuahsi.wof.get_values.__doc__.replace('<', '').replace('>', '')
        Retrieves site values from a WaterOneFlow service using a GetValues request.
        wsdl_url : str
            URL of a service's web service definition language (WSDL) description.
            All WaterOneFlow services publish a WSDL description and this url is the
            entry point to the service.
        site_code : str
            Site code of the site you'd like to get values for. Site codes MUST
            contain the network and be of the form network:site_code, as is
            required by WaterOneFlow.
        variable_code : str
            Variable code of the variable you'd like to get values for. Variable
            codes MUST contain the network and be of the form
            vocabulary:variable_code, as is required by WaterOneFlow.
        start : ``None`` or datetime (see :ref:`dates-and-times`)
            Start of a date range for a query. If both start and end parameters are
            omitted, the entire time series available will be returned.
        end : ``None`` or datetime (see :ref:`dates-and-times`)
            End of a date range for a query. If both start and end parameters are
            omitted, the entire time series available will be returned.
        suds_cache: ``None`` or tuple
            SOAP local cache duration for WSDL description and client object.
            Pass a cache duration tuple like ('days', 3) to set a custom duration.
            Duration may be in months, weeks, days, hours, or seconds.
            If unspecified, the default duration (1 day) will be used.
            Use ``None`` to turn off caching.
        site_values : dict
            a python dict containing values

'odm2timeseries:USU33' is ‘Oxygen, dissolved percent of saturation’

variablecd = 'USU33'

site_values = ulmo.cuahsi.wof.get_values(wsdlurl, networkcd+':'+sitecd, networkcd+':'+variablecd)
['sources', 'quality_control_levels', 'values', 'methods', 'variable', 'site']
sitevariable = site_values['variable']
{'code': 'USU33',
 'data_type': 'Average',
 'general_category': 'Water Quality',
 'id': '33',
 'name': 'Oxygen, dissolved percent of saturation',
 'no_data_value': '-9999.0000000000',
 'sample_medium': 'Unknown',
 'time': {'interval': '30',
  'units': {'abbreviation': 'min', 'name': 'minute', 'type': 'Time'}},
 'units': {'abbreviation': '%',
  'code': '1',
  'name': 'percent',
  'type': 'Dimensionless'},
 'value_type': 'Unknown',
 'vocabulary': 'mysqlodm2timeseries'}

site_values['values'] is a list of individual time series values (timestamp and data value)

type(site_values['values']), site_values['values'][0].keys()

Start and end timestamps (local time with time offset vs utc; iso8601 format)

site_values['values'][0]['datetime'], site_values['values'][-1]['datetime']
('2007-09-01T00:00:00', '2007-09-30T23:30:00')

Set a nice, user-friendly variable name string.

variable_name = '%s (%s)' % (sitevariable['name'], sitevariable['value_type'])
'Oxygen, dissolved percent of saturation (Unknown)'
dtstr_last = site_values['values'][-1]['datetime']
datetime.datetime(2007, 9, 30, 23, 30, tzinfo=<UTC>)

ValueError: astimezone() cannot be applied to a naive datetime
ts_df = site_series_values_to_df(site_values, variable_name)
Oxygen, dissolved percent of saturation (Unknown)
2007-09-30 21:30:00+00:0094.99999
2007-09-30 22:00:00+00:0094.18334
2007-09-30 22:30:00+00:0093.28333
2007-09-30 23:00:00+00:0092.41666
2007-09-30 23:30:00+00:0091.58334
type(ts_df), ts_df.columns, ts_df.index.dtype, ts_df.index.min(), ts_df.index.max()
 Index([u'Oxygen, dissolved percent of saturation (Unknown)'], dtype='object'),
 datetime64[ns, UTC],
 Timestamp('2007-09-01 00:00:00+0000', tz='UTC'),
 Timestamp('2007-09-30 23:30:00+0000', tz='UTC'))
fig, ax = plt.subplots(figsize=(10, 4))
varlabel = ts_df.columns[0]
ts_df[varlabel].plot(style='-', ax=ax)
ax.set_ylabel(varlabel + ', ' + sitevariable['units']['abbreviation']);


