##########################################################################################
# julian/iso_parsers.py
##########################################################################################
"""
===========
ISO Parsers
===========
"""
import numpy as np
from julian.calendar import day_from_ymd, day_from_yd
from julian.leap_seconds import seconds_on_day
from julian.time_of_day import sec_from_hms
from julian.utc_tai_tdb_tt import tai_from_day_sec, tdb_from_tai, time_from_time
from julian._exceptions import JulianParseException, JulianValidateFailure
def _count_white(string):
"""(number of leading blanks, number of trailing blanks)"""
lstring = len(string)
if not lstring:
return (0, 0) # pragma: no cover
# Count the leading blanks
for l0 in range(lstring): # pragma: no branch
if string[l0] != ' ':
break
# Count the trailing blanks
for l1 in range(lstring): # pragma: no branch
if string[~l1] != ' ': # "~l1" means counting from the end!
break
return (l0, l1)
# key = (stripped_length, dash_count); value = (y1, m0, d0, dlen, dash_locs)
_ISO_DATE_FORMAT_INFO = {
(10,2): (4, 5, 8, 2, (4,7)), # yyyy-mm-dd
( 8,1): (4, 0, 5, 3, (4,) ), # yyyy-ddd
( 8,2): (2, 3, 6, 2, (2,5)), # yy-mm-dd
( 6,1): (2, 0, 3, 3, (2,) ), # yy-ddd
( 8,0): (4, 4, 6, 2, () ), # yyyymmdd
( 7,0): (4, 0, 4, 3, () ), # yyyyddd
( 6,0): (2, 2, 4, 2, () ), # yymmdd
( 5,0): (2, 0, 2, 3, () ), # yyddd
}
[docs]
def day_from_iso(strings, *, validate=True, syntax=False, strip=False, proleptic=False):
"""Day number based on a parsing of a date string in the ISO 8601:1988 format.
Recognized calendar date formats are "yyyy-mm-dd", "yyyymmdd", "yy-mm-dd", and
"yymmdd". Supported ordinal date formats are "yyyy-ddd", "yyyyddd", "yy-ddd", and
"yyddd". A fractional day following a decimal point is also permitted.
This parser is much faster than the more general date parsing routines. It can also
process lists or arrays of date strings of arbitrary shape, provided that every
element uses the exact same format.
Because it can handle arrays of bytestrings, it is very efficient at processing raw
data extracted from a column of an ASCII table.
Parameters:
strings (str, bytes, or array-like):
String(s) to interpret.
validate (bool, optional):
True to validate the year/month/day values.
syntax (bool, optional):
True to check the string values more closely for conformance to the ISO
standard; raise JulianParseException (a ValueError subclass) on error.
strip (bool, optional):
True to skip over leading and trailing blanks.
proleptic (bool, optional):
True to interpret all dates according to the modern Gregorian calendar, even
those that occurred prior to the transition from the Julian calendar. False to
use the Julian calendar for earlier dates.
Returns:
int or array: Day number(s) relative to January 1, 2000.
Raises:
JulianValidateFailure:
If `validate` is True and a year, month, or day value is out of range.
"""
# Convert to bytestring if necessary, replace Unicode
strings = np.array(strings).astype('S')
first_index = len(strings.shape) * (0,)
first = strings[first_index].decode('latin8')
lfirst = len(first)
# Count characters to strip
w2 = strings.itemsize - lfirst # w2 = 0 or 1 trailing null
# w0 = number of blanks before
# w1 = number of blanks after
if strip:
(w0, w1) = _count_white(first)
else:
(w0, w1) = (0, 0)
test = first.replace('-', '').replace('.', '')
if not test.isdecimal():
raise JulianParseException(f'unrecognized ISO date format: "{first}"')
# Check for a dot
lstripped = lfirst - w0 - w1 # length without padding
kdot = max(0, first.find('.')) # 0 means no dot
kend = w0 + lstripped # index of the first char after the date
kints = kdot if kdot else kend # index of first char after all integers
# Identify the format
ndashes = len(str(first).split('-')) - 1
try:
(y1, m0, d0, dlen, dashes) = _ISO_DATE_FORMAT_INFO[(kints - w0, ndashes)]
except KeyError:
raise JulianParseException(f'unrecognized ISO date format: "{first}"')
# Construct the dtype dictionary
dtype_dict = {}
dtype_dict['y'] = (f'|S{y1}', w0)
dtype_dict['d'] = (f'|S{dlen}', w0 + d0) # d is just the integer part
if m0:
dtype_dict['m'] = ('|S2', w0 + m0)
if w0:
dtype_dict['white0'] = (f'|S{w0}', 0)
for i, dash in enumerate(dashes):
dtype_dict[f'dash{i}'] = ('|S1', w0 + dash)
if kdot:
dtype_dict['dot'] = ('|S1', kdot)
flen = kend - kdot - 1
if flen:
dtype_dict['f'] = (f'|S{flen}', kdot + 1)
if w1:
dtype_dict['white1'] = (f'|S{w1}', kend)
if w2:
dtype_dict['nulls'] = ('|S1', lfirst)
if syntax:
dtype_dict['data'] = (f'|S{kend-w0}', w0)
# Extract year, month, day, and fraction; JulianParseException on failure
strings = strings.view(np.dtype(dtype_dict))
try:
y = strings['y'].astype('int')
d = strings['d'].astype('int')
m = strings['m'].astype('int') if 'm' in dtype_dict else 0
f = strings['f'].astype('int') if 'f' in dtype_dict else 0
except ValueError as e:
raise JulianParseException(str(e))
# Validate syntax if necessary
if syntax:
if 'dash0' in dtype_dict and np.any(strings['dash0'] != b'-'):
raise JulianParseException('inconsistent dashes in ISO date')
if 'dash1' in dtype_dict and np.any(strings['dash1'] != b'-'):
raise JulianParseException('inconsistent dashes in ISO date')
if 'white0' in dtype_dict and np.any(strings['white0'] != w0 * b' '):
raise JulianParseException('inconsistent white space in ISO date')
if 'white1' in dtype_dict and np.any(strings['white1'] != w1 * b' '):
raise JulianParseException('inconsistent white space in ISO date')
if 'nulls' in dtype_dict and np.any(strings['nulls'] != b'\0'):
raise JulianParseException('inconsistent null termination in ISO date')
data = bytearray(strings['data'])
if b' ' in data:
raise JulianParseException('invalid blank character in ISO date')
for key in ('y', 'd', 'm', 'f'):
if key in dtype_dict:
if b'-' in bytearray(strings[key]):
raise JulianParseException('invalid negative value in ISO date')
# Convert to day
if m0:
day = day_from_ymd(y, m, d, validate=validate, proleptic=proleptic)
else:
day = day_from_yd(y, d, validate=validate, proleptic=proleptic)
# Add fraction if needed
if kdot:
if np.shape(day):
day = day + f/10.**(flen)
else:
day = day + float(f)/10.**(flen)
return day
########################################
[docs]
def sec_from_iso(strings, *, validate=True, leapsecs=True, strip=False, syntax=False):
"""Accumulated number of seconds into a day, based on a parsing of a time string in
ISO 8601:1988 "extended" format (but using a decimal point for fractional seconds
rather than a comma).
The format required is "hh:mm:ss[.s...][Z]". This parser is much faster than the more
general time parsing routines. It can also process lists or arrays of date strings of
arbitrary shape, provided that every element uses the exact same format.
Because it can handle arrays of bytestrings, it is very efficient at processing raw
data extracted from a column of an ASCII table.
Parameters:
strings (str, bytes, or array-like[str or bytes]):
Strings to interpret. If an array is provided, all values must use the same
format.
validate (bool, optional):
True to check the year/month/day values more carefully; raise
JulianValidateFailure (a ValueError subclass) on error.
syntax (bool, optional):
True to check the string values more closely for conformance to the ISO
standard; raise JulianParseException (a ValueError subclass) on error.
strip (bool, optional):
True to skip over leading and trailing blanks.
leapsecs (bool, optional):
True to tolerate leap second values during validation.
Returns:
int, float, or array:
Elapsed seconds since beginning of day. Values are integral the seconds value
is integral.
Raises:
JulianValidateFailure: If `validate` is True and an hour, minute, or second value
is out of range.
"""
# Convert to bytestring if necessary, replace Unicode
strings = np.array(strings).astype('S')
first_index = len(strings.shape) * (0,)
first = strings[first_index].decode('latin8')
lfirst = len(first)
# Count characters to strip
w2 = strings.itemsize - lfirst # w2 = 0 or 1 trailing null
# w0 = number of blanks before
# w1 = number of blanks after
if strip:
(w0, w1) = _count_white(first)
else:
(w0, w1) = (0, 0)
test = first.replace(':', '').replace('.', '').rstrip('Z')
if not test.isdecimal():
raise JulianParseException(f'unrecognized ISO time format: "{first}"')
# Check for "Z"
lstripped = lfirst - w0 - w1
wz = int(first[w0 + lstripped - 1] == 'Z') # wz = 0 or 1
lstripped -= wz # width of time string without extras
kend = w0 + lstripped # index of the first char after the time
# Locate colons and dots
first_array = np.array(list(first))
kcolons = np.where(first_array == ':')[0]
if kcolons.size > 2:
raise JulianParseException('unrecognized ISO time format; too many colons: '
f'"{first}"')
kdots = np.where(first_array == '.')[0]
if kdots.size > 1:
raise JulianParseException('unrecognized ISO time format; too many decimals: '
f'"{first}"')
kdot = 0
elif kdots.size == 1:
kdot = kdots[0]
else:
kdot = 0
kints = kdot if kdot else kend # index of first char after all integers
# Identify the h, m, s, and fraction field locations and widths
if kcolons.size:
kcolons = [w0-1] + list(kcolons) # colon locations plus fake one in front
khms = np.array(kcolons) + 1 # start locations of fields
khms1 = list(kcolons[1:]) + [kints] # end locations of all integer fields
widths = khms1 - khms # widths of fields
if np.any(widths != 2):
raise JulianParseException(f'invalid field width in ISO time: "{first}"')
else:
width = kints - w0
fields = width // 2
if fields > 3 or width != fields * 2:
raise JulianParseException('invalid text width in ISO time format: '
f'"{first}"')
khms = w0 + 2 * np.arange(fields) # start locations of fields
widths = fields * [2]
# Construct the dtype dictionary
dtype_dict = {}
for i, w in enumerate(widths):
key = 'hms'[i]
dtype_dict[key] = (f'|S{w}', khms[i])
if w0:
dtype_dict['white0'] = (f'|S{w0}', 0)
for i, kcolon in enumerate(kcolons[1:]): # skip fake colon in front
dtype_dict[f'colon{i}'] = ('|S1', kcolon)
if kdot:
dtype_dict['dot'] = ('|S1', kdot)
flen = kend - kdot - 1
if flen:
dtype_dict['f'] = (f'|S{flen}', kdot + 1)
if wz:
dtype_dict['z'] = ('|S1', kend)
if w1:
dtype_dict['white1'] = (f'|S{w1}', kend + wz)
if w2:
dtype_dict['nulls'] = ('|S1', lfirst)
if syntax:
dtype_dict['data'] = (f'|S{kend-w0}', w0)
# Extract hours, minutes, seconds; JulianParseException on failure
strings = strings.view(np.dtype(dtype_dict))
try:
h = strings['h'].astype('int')
m = strings['m'].astype('int') if 'm' in dtype_dict else 0
s = strings['s'].astype('int') if 's' in dtype_dict else 0
except ValueError as e:
raise JulianParseException(str(e))
if kdot:
if 'f' in dtype_dict:
f = strings['f'].astype('int') / 10.**flen
else:
f = 0.
if 's' in dtype_dict:
s = s + f
elif 'm' in dtype_dict:
m = m + f
else:
h = h + f
# Validate if necessary
if syntax:
if 'white0' in dtype_dict and np.any(strings['white0'] != w0 * b' '):
raise JulianParseException('inconsistent white space in ISO time')
if 'colon0' in dtype_dict and np.any(strings['colon0'] != b':'):
raise JulianParseException('inconsistent colons in ISO time')
if 'colon1' in dtype_dict and np.any(strings['colon1'] != b':'):
raise JulianParseException('inconsistent colons in ISO time')
if 'dot' in dtype_dict and np.any(strings['dot'] != b'.'):
raise JulianParseException('inconsistent decimal points in ISO time')
if 'z' in dtype_dict and np.any(strings['z'] != b'Z'):
raise JulianParseException('inconsistent "Z" usage in ISO time')
if 'white1' in dtype_dict and np.any(strings['white1'] != w1 * b' '):
raise JulianParseException('inconsistent white space in ISO time')
if 'nulls' in dtype_dict and np.any(strings['nulls'] != b'\0'):
raise JulianParseException('inconsistent null termination in ISO time')
data = bytearray(strings['data'])
if b' ' in data or b'-' in data:
raise JulianParseException('invalid blank character in ISO time')
return sec_from_hms(h, m, s, validate=validate, leapsecs=leapsecs)
########################################
[docs]
def day_sec_from_iso(strings, *, validate=True, syntax=False, strip=False,
proleptic=False):
"""Day and second based on a parsing of the string in ISO date-time format.
This function parses date-time strings in the fixed ISO format, using "yyyy-mm-dd"
or "yyyy-ddd" for the date, a single space or "T", and a time as "hh:mm:ss[.s...][Z]".
It is much faster than the more general date parsing routines. It can also process
lists or arrays of date strings of arbitrary shape, provided that every element uses
the exact same format.
Because it can handle arrays of bytestrings, it is very efficient at processing raw
data extracted from a column of an ASCII table.
Parameters:
strings (str, bytes, or array-like:
Strings to interpret. If an array is provided, all values must use the same
format.
validate (bool, optional):
True to validate the ranges of the year, month, and day values.
syntax (bool, optional):
True to check the string values more closely for conformance to the ISO
standard; raise JulianParseException (a ValueError subclass) on error.
strip (bool, optional):
True to skip over leading and trailing blanks.
leapsecs (bool, optional):
True to tolerate leap second values during validation.
proleptic (bool, optional):
True to interpret all dates according to the modern Gregorian calendar, even
those that occurred prior to the transition from the Julian calendar. False to
use the Julian calendar for earlier dates.
Returns:
tuple (day, sec):
- **day** (*int or array*): Day number(s) relative to January 1, 2000.
- **sec** (*int, float, or array*): Elapsed seconds since beginning of day. Values
are integral the seconds value is integral.
Raises:
JulianValidateFailure:
If `validate` is True and any numeric value is out of range.
"""
# Convert to an array of strings, replace Unicode
strings = np.array(strings).astype('S')
first_index = len(strings.shape) * (0,)
first = strings[first_index].decode('latin8')
lfirst = len(first)
# Check for a T or blank separator
csep = 'T'
isep = first.find(csep)
if isep == -1:
w0, w1 = _count_white(first)
csep = ' '
isep = first.find(csep, w0)
if isep == lfirst - w1:
isep = -1
# If no separator is found, it is just a date
if isep == -1:
return (day_from_iso(strings, validate=validate, strip=strip), 0)
# Otherwise, parse the date and time separately
dtype_dict = {'date': ('|S' + str(isep), 0),
'time': ('|S' + str(lfirst - isep - 1), isep + 1),
'sep' : ('|S1', isep)}
strings = strings.view(np.dtype(dtype_dict))
day = day_from_iso(strings['date'], validate=validate, syntax=syntax, strip=strip,
proleptic=proleptic)
sec = sec_from_iso(strings['time'], validate=validate, syntax=syntax, strip=strip,
leapsecs=True)
if syntax:
if np.any(strings['sep'] != csep.encode('latin8')):
raise JulianParseException('invalid ISO date-time punctuation')
if validate:
if np.any(sec >= seconds_on_day(day)):
raise JulianValidateFailure('seconds value is outside allowed range')
return (day, sec)
########################################
[docs]
def tai_from_iso(strings, *, validate=True, strip=False, proleptic=False):
"""TAI time given an ISO date or date-time string.
This is a shortcut for `time_from_iso()` with timesys='TAI'.
Parameters:
strings (str, bytes, or array-like):
Strings to interpret. If an array is provided, all values must use the same
format.
validate (bool, optional):
True to validate the date and time values.
strip (bool, optional):
True to skip over leading and trailing blanks.
proleptic (bool, optional):
True to interpret all dates according to the modern Gregorian calendar, even
those that occurred prior to the transition from the Julian calendar. False to
use the Julian calendar for earlier dates.
Returns:
int, float, or array: Time in seconds TAI.
Raises:
JulianValidateFailure:
If a value embedded in the date or time is out of range.
"""
(day, sec) = day_sec_from_iso(strings, validate=validate, strip=strip,
proleptic=proleptic)
return tai_from_day_sec(day, sec)
[docs]
def tdb_from_iso(strings, *, validate=True, strip=False, proleptic=False):
"""TDB time given an ISO date or date-time string.
This is a shortcut for `time_from_iso()` with timesys='TDB'.
Parameters:
strings (str, bytes, or array-like):
Strings to interpret. If an array is provided, all values must use the same
format.
validate (bool, optional):
True to validate the date and time values.
strip (bool, optional):
True to skip over leading and trailing blanks.
proleptic (bool, optional):
True to interpret all dates according to the modern Gregorian calendar, even
those that occurred prior to the transition from the Julian calendar. False to
use the Julian calendar for earlier dates.
Returns:
int, float, or array: Time in seconds TDB.
Raises:
JulianValidateFailure:
If a value embedded in the date or time is out of range.
"""
(day, sec) = day_sec_from_iso(strings, validate=validate, strip=strip,
proleptic=proleptic)
return tdb_from_tai(tai_from_day_sec(day, sec))
[docs]
def time_from_iso(strings, timesys='TAI', *, validate=True, strip=False, proleptic=False):
"""Time in a specified time system given an ISO date or date-time string.
Parameters:
strings (str, bytes, or array-like[str or bytes]):
Strings to interpret. If an array is provided, all values must use the same
format.
timesys (str):
Name of the time system, "UTC", "TAI", "TDB", or "TT".
validate (bool, optional):
True to validate the date and time values.
strip (bool, optional):
True to skip over leading and trailing blanks.
proleptic (bool, optional):
True to interpret all dates according to the modern Gregorian calendar, even
those that occurred prior to the transition from the Julian calendar. False to
use the Julian calendar for earlier dates.
Returns:
int, float, or array: Time in seconds in the specified time system.
Raises:
JulianValidateFailure:
If a value embedded in the date or time is out of range.
"""
tai = tai_from_iso(strings, validate=validate, strip=strip, proleptic=proleptic)
return time_from_time(tai, 'TAI', newsys=timesys)
##########################################################################################