Updated:

Testing WOFpy LBR sample DB

Emilio Mayorga. Run on my conda environment uwapl_em_mc_1aui.
3/5,4/2017. Test Don’s Amazon cloud deployment

In [1]:
%matplotlib inline

import pytz
import matplotlib.pyplot as plt
import pandas as pd

import ulmo
from ulmo.util import convert_datetime
/home/filipe/miniconda3/envs/BiG-CZ/lib/python2.7/site- packages/ulmo/twc/kbdi/core.py:20: FutureWarning: pandas.tslib is deprecated and will be removed in a future version. You can access Timestamp as pandas.Timestamp CSV_SWITCHOVER = pandas.tslib.Timestamp('2016-10-01')

CUAHSI WaterOneFlow: ulmo, SOAP endpoint, and other general info

In [2]:
print(ulmo.cuahsi.wof.__doc__)
    
        ulmo.cuahsi.wof
        ~~~~~~~~~~~~~~~
    
        `CUAHSI WaterOneFlow`_ web services
    
        .. _CUAHSI WaterOneFlow: http://his.cuahsi.org/wofws.html
    

In [3]:
print([obj for obj in dir(ulmo.cuahsi.wof) if not obj.startswith('__')])
    ['absolute_import', 'core', 'get_site_info', 'get_sites', 'get_values', 'get_variable_info']

In [4]:
# WaterML/WOF WSDL endpoints
wsdlurl = 'http://54.186.36.247:8080/mysqlodm2timeseries/soap/cuahsi_1_0/.wsdl'  # WOF 1.0

# 'network code'
networkcd = 'mysqlodm2timeseries'

Get site information

one of two sites in the LBR sample DB

In [5]:
sitecd = 'USU-LBR-Mendon'
In [6]:
siteinfo = ulmo.cuahsi.wof.get_site_info(wsdlurl, networkcd+':'+sitecd)
In [7]:
type(siteinfo), siteinfo.keys()
(dict,
 ['code', 'name', 'series', 'notes', 'network', 'location', 'timezone_info'])
In [8]:
siteinfo['network'], siteinfo['code'], siteinfo['name']
('mysqlodm2timeseries',
 'USU-LBR-Mendon',
 'Little Bear River at Mendon Road near Mendon, Utah')
In [9]:
print(siteinfo['location'])
    {'latitude': '41.718473', 'srs': 'EPSG:CUAHSI:4269', 'longitude': '-111.946402'}

In [10]:
type(siteinfo['series']), len(siteinfo['series']), siteinfo['series'].keys()
(dict,
 18,
 ['mysqlodm2timeseries:USU7',
  'mysqlodm2timeseries:USU6',
  'mysqlodm2timeseries:USU5',
  'mysqlodm2timeseries:USU4',
  'mysqlodm2timeseries:USU3',
  'mysqlodm2timeseries:USU48',
  'mysqlodm2timeseries:USU47',
  'mysqlodm2timeseries:USU9',
  'mysqlodm2timeseries:USU8',
  'mysqlodm2timeseries:USU13',
  'mysqlodm2timeseries:USU10',
  'mysqlodm2timeseries:USU35',
  'mysqlodm2timeseries:USU34',
  'mysqlodm2timeseries:USU37',
  'mysqlodm2timeseries:USU36',
  'mysqlodm2timeseries:USU44',
  'mysqlodm2timeseries:USU33',
  'mysqlodm2timeseries:USU32'])
In [11]:
siteinfo['series']['mysqlodm2timeseries:USU33'].keys()
['{http://www.cuahsi.org/water_ml/1.0/}variable_time_interval',
 '{http://www.cuahsi.org/water_ml/1.0/}_source',
 '{http://www.cuahsi.org/water_ml/1.0/}_quality_control_level',
 'variable',
 '{http://www.cuahsi.org/water_ml/1.0/}_method',
 '{http://www.cuahsi.org/water_ml/1.0/}value_count']
In [12]:
siteinfo['series']['mysqlodm2timeseries:USU33']
{'variable': {'code': 'USU33',
  'data_type': 'Average',
  'general_category': 'Water Quality',
  'id': '33',
  'name': 'Oxygen, dissolved percent of saturation',
  'no_data_value': '-9999.0000000000',
  'sample_medium': 'Unknown',
  'time': {},
  'units': {'abbreviation': '%',
   'code': '1',
   'name': 'percent',
   'type': 'Dimensionless'},
  'value_type': 'Unknown',
  'vocabulary': 'mysqlodm2timeseries'},
 '{http://www.cuahsi.org/water_ml/1.0/}_method': {'method_description': 'Dissolved oxygen measured using a Hydrolab MS5 Water Quality Multiprobe.',
  'method_id': '19',
  'method_link': 'http://www.hydrolab.com'},
 '{http://www.cuahsi.org/water_ml/1.0/}_quality_control_level': {'quality_control_level': '0',
  'quality_control_level_id': '0'},
 '{http://www.cuahsi.org/water_ml/1.0/}_source': {'organization': 'Utah State University Utah Water Research Laboratory',
  'source_description': 'Continuous water quality monitoring by Utah State University as part of the USDA CEAP Grant',
  'source_id': '1',
  'source_link': 'http://www.bearriverinfo.org'},
 '{http://www.cuahsi.org/water_ml/1.0/}value_count': {'value_count': '1440'},
 '{http://www.cuahsi.org/water_ml/1.0/}variable_time_interval': {'begin_date_time': '2007-09-01T00:00:00',
  'end_date_time': '2007-09-30T23:30:00',
  'variable_time_interval_type': 'TimeIntervalType'}}

Get Values

In [13]:
def site_series_values_to_df(series_values, variable_name):
    # Create a clean timeseries list of (dt, val) tuples
    tsdt_tuplst = [
        (convert_datetime(valdict['datetime']).replace(tzinfo=pytz.utc),
         float(valdict['value'])) for valdict in series_values['values']
    ]

    dt, val = zip(*tsdt_tuplst)
    ts_df = pd.DataFrame({'time': dt, variable_name: val})
    ts_df.set_index('time', inplace=True)
    ts_df.sort_index(ascending=True, inplace=True)
    return ts_df
In [14]:
print(
    ulmo.cuahsi.wof.get_values.__doc__.replace('<', '').replace('>', '')
)
    
        Retrieves site values from a WaterOneFlow service using a GetValues request.
    
        Parameters
        ----------
        wsdl_url : str
            URL of a service's web service definition language (WSDL) description.
            All WaterOneFlow services publish a WSDL description and this url is the
            entry point to the service.
        site_code : str
            Site code of the site you'd like to get values for. Site codes MUST
            contain the network and be of the form network:site_code, as is
            required by WaterOneFlow.
        variable_code : str
            Variable code of the variable you'd like to get values for. Variable
            codes MUST contain the network and be of the form
            vocabulary:variable_code, as is required by WaterOneFlow.
        start : ``None`` or datetime (see :ref:`dates-and-times`)
            Start of a date range for a query. If both start and end parameters are
            omitted, the entire time series available will be returned.
        end : ``None`` or datetime (see :ref:`dates-and-times`)
            End of a date range for a query. If both start and end parameters are
            omitted, the entire time series available will be returned.
        suds_cache: ``None`` or tuple
            SOAP local cache duration for WSDL description and client object.
            Pass a cache duration tuple like ('days', 3) to set a custom duration.
            Duration may be in months, weeks, days, hours, or seconds.
            If unspecified, the default duration (1 day) will be used.
            Use ``None`` to turn off caching.
    
        Returns
        -------
        site_values : dict
            a python dict containing values
        

'odm2timeseries:USU33' is ‘Oxygen, dissolved percent of saturation’

In [15]:
variablecd = 'USU33'

site_values = ulmo.cuahsi.wof.get_values(wsdlurl, networkcd+':'+sitecd, networkcd+':'+variablecd)
In [16]:
site_values.keys()
['sources', 'quality_control_levels', 'values', 'methods', 'variable', 'site']
In [17]:
sitevariable = site_values['variable']
sitevariable
{'code': 'USU33',
 'data_type': 'Average',
 'general_category': 'Water Quality',
 'id': '33',
 'name': 'Oxygen, dissolved percent of saturation',
 'no_data_value': '-9999.0000000000',
 'sample_medium': 'Unknown',
 'time': {'interval': '30',
  'units': {'abbreviation': 'min', 'name': 'minute', 'type': 'Time'}},
 'units': {'abbreviation': '%',
  'code': '1',
  'name': 'percent',
  'type': 'Dimensionless'},
 'value_type': 'Unknown',
 'vocabulary': 'mysqlodm2timeseries'}

site_values['values'] is a list of individual time series values (timestamp and data value)

In [18]:
type(site_values['values']), site_values['values'][0].keys()
(list,
 ['method_id',
  'censor_code',
  'quality_control_level',
  'source_id',
  'value',
  'datetime'])

Start and end timestamps (local time with time offset vs utc; iso8601 format)

In [19]:
site_values['values'][0]['datetime'], site_values['values'][-1]['datetime']
('2007-09-01T00:00:00', '2007-09-30T23:30:00')

Set a nice, user-friendly variable name string.

In [20]:
variable_name = '%s (%s)' % (sitevariable['name'], sitevariable['value_type'])
variable_name
'Oxygen, dissolved percent of saturation (Unknown)'
In [21]:
dtstr_last = site_values['values'][-1]['datetime']
convert_datetime(dtstr_last).replace(tzinfo=pytz.utc)
/home/filipe/miniconda3/envs/BiG-CZ/lib/python2.7/site- packages/ipykernel/__main__.py:2: FutureWarning: to_datetime is deprecated. Use self.to_pydatetime() from ipykernel import kernelapp as app
datetime.datetime(2007, 9, 30, 23, 30, tzinfo=<UTC>)

Hmm, this failed:

convert_datetime(dtstr_last).astimezone(pytz.utc)
ValueError: astimezone() cannot be applied to a naive datetime
In [22]:
ts_df = site_series_values_to_df(site_values, variable_name)
ts_df.tail()
/home/filipe/miniconda3/envs/BiG-CZ/lib/python2.7/site- packages/ipykernel/__main__.py:5: FutureWarning: to_datetime is deprecated. Use self.to_pydatetime()
Oxygen, dissolved percent of saturation (Unknown)
time
2007-09-30 21:30:00+00:0094.99999
2007-09-30 22:00:00+00:0094.18334
2007-09-30 22:30:00+00:0093.28333
2007-09-30 23:00:00+00:0092.41666
2007-09-30 23:30:00+00:0091.58334
In [23]:
type(ts_df), ts_df.columns, ts_df.index.dtype, ts_df.index.min(), ts_df.index.max()
(pandas.core.frame.DataFrame,
 Index([u'Oxygen, dissolved percent of saturation (Unknown)'], dtype='object'),
 datetime64[ns, UTC],
 Timestamp('2007-09-01 00:00:00+0000', tz='UTC'),
 Timestamp('2007-09-30 23:30:00+0000', tz='UTC'))
In [24]:
fig, ax = plt.subplots(figsize=(10, 4))
varlabel = ts_df.columns[0]
ts_df[varlabel].plot(style='-', ax=ax)
ax.set_ylabel(varlabel + ', ' + sitevariable['units']['abbreviation']);

png


Right click and choose Save link as… to download this notebook, or click here to run a live instance of this notebook.