1+ """Functions for manipulating HAPI times (restricted ISO 8601 strings)."""
2+ import re
3+ import time
4+
5+ import pandas
6+ import isodate
7+ import numpy as np
8+
9+ from hapiclient .util import error , log
10+
11+ def hapitime_reformat (form_to_match , given_form , logging = False ):
12+ """Reformat a given HAPI time to match format of another HAPI time.
13+
14+ ``hapitime_reformat(match, given)`` truncates or pads ``given`` so that it has
15+ the same format as ``match``.
16+
17+ This function allows for efficient subsetting of arrays of HAPI time
18+ strings. For example, to select all time elements after a time of ``start``,
19+ first convert ``start`` so that it has the same format as the elements of
20+ ``data['Time']``
21+
22+ ::
23+
24+ start = hapitime_reformat(data['Time'][0], start)
25+
26+ Then subset using
27+
28+ ::
29+
30+ data = data[data['Time'] >= start]
31+
32+ This is much more efficient than converting ``data['Time']`` to ``datetime``
33+ objects and using ``datetime`` comparsion methods.
34+
35+ Examples
36+ --------
37+ ::
38+
39+ hapitime_format_str('1989Z', '1989-01Z') # 1989Z
40+ hapitime_format_str('1989-001T00:00Z', '1999-01-21Z') # 1999-021T00:00Z
41+
42+ """
43+
44+ log ('ref: {}' .format (form_to_match ), {'logging' : logging })
45+ log ('given: {}' .format (given_form ), {'logging' : logging })
46+
47+ if 'T' in given_form :
48+ dt_given = isodate .parse_datetime (given_form )
49+ else :
50+ # Remove trailing Z b/c parse_date does not implement of date with
51+ # trailing Z, which is valid IS8601.
52+ dt_given = isodate .parse_date (given_form [0 :- 1 ])
53+
54+ # Get format string, e.g., %Y-%m-%dT%H
55+ format_ref = hapitime_format_str ([form_to_match ])
56+
57+ if '%f' in format_ref :
58+ form_to_match = form_to_match .strip ('Z' )
59+ form_to_match_fractional = form_to_match .split ('.' )[- 1 ]
60+ form_to_match = '' .join (form_to_match .split ('.' )[:- 1 ])
61+
62+ given_form_fractional = '000000000'
63+ given_form_fmt = hapitime_format_str ([given_form ])
64+ given_form = given_form .strip ('Z' )
65+
66+ if '%f' in given_form_fmt :
67+ given_form_fractional = given_form .split ('.' )[- 1 ]
68+ given_form = '' .join (given_form .split ('.' )[:- 1 ])
69+
70+ converted = hapitime_reformat (form_to_match + 'Z' , given_form + 'Z' )
71+ converted = converted .strip ('Z' )
72+
73+ converted_fractional = '{:0<{}.{}}' .format (given_form_fractional ,
74+ len (form_to_match_fractional ),
75+ len (form_to_match_fractional ))
76+ converted = converted + '.' + converted_fractional
77+
78+ if 'Z' in format_ref :
79+ return converted + 'Z'
80+
81+ return converted
82+
83+ converted = dt_given .strftime (format_ref )
84+
85+ if len (converted ) > len (form_to_match ):
86+ converted = converted [0 :len (form_to_match )- 1 ] + "Z"
87+
88+ log ('converted: {}' .format (converted ), {'logging' : logging })
89+ log ('ref fmt: {}' .format (format_ref ), {'logging' : logging })
90+ log ('----' , {'logging' : logging })
91+
92+ return converted
93+
94+
95+ def hapitime_format_str (Time ):
96+ """Determine the time format string for a HAPI time."""
97+
98+ d = 0
99+ # Catch case where no trailing Z
100+ # Technically HAPI ISO 8601 must have trailing Z; See
101+ # https://github.com/hapi-server/data-specification/blob/master/
102+ # hapi-dev/HAPI-data-access-spec-dev.md#representation-of-time
103+ if not re .match (r".*Z$" , Time [0 ]):
104+ d = 1
105+
106+ # Parse date part
107+ # If h=True then hour given.
108+ # If hm=True, then hour and minute given.
109+ # If hms=True, them hour, minute, and second given.
110+ (h , hm , hms ) = (False , False , False )
111+
112+ if len (Time [0 ]) == 4 or (len (Time [0 ]) == 5 and Time [0 ][- 1 ] == "Z" ):
113+ fmt = '%Y'
114+ elif re .match (r"[0-9]{4}-[0-9]{3}" , Time [0 ]):
115+ # YYYY-DOY format
116+ fmt = "%Y-%j"
117+ if len (Time [0 ]) >= 12 - d :
118+ h = True
119+ if len (Time [0 ]) >= 15 - d :
120+ hm = True
121+ if len (Time [0 ]) >= 18 - d :
122+ hms = True
123+ elif re .match (r"[0-9]{4}-[0-9]{2}" , Time [0 ]):
124+ # YYYY-MM-DD format
125+ fmt = "%Y-%m"
126+ if len (Time [0 ]) > 8 :
127+ fmt = fmt + "-%d"
128+ if len (Time [0 ]) >= 14 - d :
129+ h = True
130+ if len (Time [0 ]) >= 17 - d :
131+ hm = True
132+ if len (Time [0 ]) >= 20 - d :
133+ hms = True
134+ else :
135+ # TODO: Also check for invalid time string lengths. Use JSON schema
136+ # regular expressions for allowed versions of ISO 8601.
137+ # https://github.com/hapi-server/verifier-nodejs/tree/master/schemas
138+ error ('First time value %s is not a valid HAPI Time' % Time [0 ])
139+
140+ if h :
141+ fmt = fmt + "T%H"
142+ if hm :
143+ fmt = fmt + ":%M"
144+ if hms :
145+ fmt = fmt + ":%S"
146+
147+ if re .match (r".*\.[0-9].*$" , Time [0 ]):
148+ fmt = fmt + ".%f"
149+ if re .match (r".*\.$" , Time [0 ]) or re .match (r".*\.Z$" , Time [0 ]):
150+ fmt = fmt + "."
151+
152+ if re .match (r".*Z$" , Time [0 ]):
153+ fmt = fmt + "Z"
154+
155+ return fmt
156+
157+
158+ def hapitime2datetime (Time , ** kwargs ):
159+ """Convert HAPI timestamps to Python datetimes.
160+
161+ A HAPI-compliant server represents time as an ISO 8601 string
162+ (with several constraints - see the `HAPI specification
163+ <https://github.com/hapi-server/data-specification/blob/master/hapi-dev/HAPI-data-access-spec-dev.md#representation-of-time>`_)
164+
165+ `hapi()` reads these time strings into a NumPy array of Python byte literals.
166+ This function converts these byte literals to Python datetime objects.
167+
168+ Typical usage:
169+
170+ ::
171+
172+ data = hapi(...) # Get data
173+ DateTimes = hapitime2datetime(data['Time']) # Convert
174+
175+
176+ All HAPI time strings must have a trailing Z. This function only checks the
177+ first element in Time array for compliance.
178+
179+ Parameter
180+ ---------
181+ Time:
182+ - A numpy array of HAPI timestamp byte literals
183+ - A numpy array of HAPI timestamp strings
184+ - A list of HAPI timestamp byte literals
185+ - A list of HAPI timestamp strings
186+ - A HAPI timestamp byte literal
187+ - A HAPI timestamp strings
188+
189+ Returns
190+ -------
191+ A NumPy array Python of datetime objects with length = len(Time)
192+
193+ Examples
194+ --------
195+ All of the following return
196+
197+ ::
198+
199+ array([datetime.datetime(1970, 1, 1, 0, 0, tzinfo=<UTC>)], dtype=object)
200+
201+ ::
202+
203+ from hapiclient.time import hapitime2datetime
204+ import numpy as np
205+
206+ hapitime2datetime(np.array([b'1970-01-01T00:00:00.000Z']))
207+ hapitime2datetime(np.array(['1970-01-01T00:00:00.000Z']))
208+
209+ hapitime2datetime([b'1970-01-01T00:00:00.000Z'])
210+ hapitime2datetime(['1970-01-01T00:00:00.000Z'])
211+
212+ hapitime2datetime([b'1970-01-01T00:00:00.000Z'])
213+ hapitime2datetime('1970-01-01T00:00:00.000Z')
214+ """
215+ from datetime import datetime
216+
217+ try :
218+ # Python 2
219+ import pytz
220+ tzinfo = pytz .UTC
221+ except :
222+ tzinfo = datetime .timezone .utc
223+
224+ if type (Time ) == list :
225+ Time = np .asarray (Time )
226+ if not all (list ( map (lambda x : type (x ) in [np .str_ , np .bytes_ , str , bytes ], Time ) )):
227+ raise ValueError
228+
229+ opts = kwargs .copy ()
230+
231+ if type (Time ) == list :
232+ Time = np .asarray (Time )
233+ if type (Time ) == str or type (Time ) == bytes :
234+ Time = np .asarray ([Time ])
235+
236+ if type (Time ) != np .ndarray :
237+ error ('Problem with time data.' + '\n ' )
238+ return
239+
240+ if Time .size == 0 :
241+ error ('Time array is empty.' + '\n ' )
242+ return
243+
244+ reshape = False
245+ if Time .shape [0 ] != Time .size :
246+ reshape = True
247+ shape = Time .shape
248+ Time = Time .flatten ()
249+
250+ if type (Time [0 ]) == np .bytes_ :
251+ try :
252+ Time = Time .astype ('U' )
253+ except :
254+ error ('Problem with time data. First value: ' + str (Time [0 ]) + '\n ' )
255+ return
256+
257+ tic = time .time ()
258+
259+ if (Time [0 ][- 1 ] != "Z" ):
260+ error ("HAPI Times must have trailing Z. First element of input " + \
261+ "Time array does not have trailing Z." )
262+
263+ try :
264+ # This is the fastest conversion option. But it will fail on YYYY-DOY
265+ # format and other valid ISO 8601 dates such as 2001-01-01T00:00:03.Z
266+ # When infer_datetime_format is used, a TimeStamp object returned,
267+ # which is the reason for the to_pydatetime() call. (When format=... is
268+ # used, a datetime object is returned.)
269+ # Although all HAPI timestamps will have trailing Z, in some cases,
270+ # infer_datetime_format will not return a timezone-aware Timestamp. This
271+ # is the reason for the call to tz_convert(tzinfo).
272+ # TODO: Use hapitime_format_str() and pass this as format=...
273+ Timeo = Time [0 ]
274+ Time = pandas .to_datetime (Time , infer_datetime_format = True ).tz_convert (tzinfo ).to_pydatetime ()
275+ if reshape :
276+ Time = np .reshape (Time , shape )
277+ toc = time .time () - tic
278+ log ("Pandas processing time = %.4fs, first time = %s" % (toc , Timeo ), opts )
279+ return Time
280+ except :
281+ log ("Pandas processing failed, first time = %s" % Time [0 ], opts )
282+
283+
284+ # Convert from Python byte literals to unicode strings
285+ # https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.astype.html
286+ # https://www.b-list.org/weblog/2017/sep/05/how-python-does-unicode/
287+ Time = Time .astype ('U' )
288+ # The new Time variable requires 4x more memory.
289+ # Could save memory at cost of speed by decoding at each iteration below, e.g.
290+ # Time[i] -> Time[i].decode('utf-8')
291+
292+ pythonDateTime = np .empty (len (Time ), dtype = object )
293+
294+ fmt = hapitime_format_str (Time )
295+
296+ # TODO: Will using pandas.to_datetime here with fmt work?
297+ try :
298+ parse_error = True
299+ for i in range (0 , len (Time )):
300+ if (Time [i ][- 1 ] != "Z" ):
301+ parse_error = False
302+ raise
303+ pythonDateTime [i ] = datetime .strptime (Time [i ], fmt ).replace (tzinfo = tzinfo )
304+ except :
305+ if parse_error :
306+ error ('Could not parse time value ' + Time [i ] + ' using ' + fmt )
307+ else :
308+ error ("HAPI Times must have trailing Z. Time[" + str (i ) + "] = " \
309+ + Time [i ] + " does not have trailing Z." )
310+
311+ toc = time .time () - tic
312+ log ("Manual processing time = %.4fs, Input = %s, fmt = %s" % \
313+ (toc , Time [0 ], fmt ), opts )
314+
315+ if reshape :
316+ pythonDateTime = np .reshape (pythonDateTime , shape )
317+
318+ return pythonDateTime
0 commit comments