Source code for julian.time_pyparser

##########################################################################################
# julian/time_pyparser.py
##########################################################################################
"""
=======================
Time pyparsing Grammars
=======================
"""

import numpy as np
from julian._TIMEZONES import TIMEZONES

from pyparsing import (
    CaselessLiteral,
    Combine,
    FollowedBy,
    Literal,
    OneOrMore,
    Optional,
    ParserElement,
    StringEnd,
    Suppress,
    White,
    Word,
    WordEnd,
    ZeroOrMore,
    alphanums,
    alphas,
    nums,
    one_of,
    srange,
)

##########################################################################################
# Begin grammar
##########################################################################################

# All whitespace is handled explicitly
ParserElement.set_default_whitespace_chars('')

# Useful definitions...
white     = Suppress(OneOrMore(White()))
opt_white = Suppress(ZeroOrMore(White()))
colon     = Suppress(Literal(':'))
not_colon = ~FollowedBy(Literal(':'))

alpha_end = WordEnd(alphas)
num_end   = WordEnd(nums)
word_end  = WordEnd(alphanums)

nonzero = srange('[1-9]')

def _action(name, value, s, l, t):
    return [(name, value), ('~', s.upper().index(t[0].upper(),l) + len(t[0]))]

def _actions(items, s, l, t):
    item_list = []
    for k in range(0, len(items), 2):
        item_list.append((items[k], items[k+1]))
    return item_list + [('~', s.upper().index(t[0].upper(),l) + len(t[0]))]

##########################################################################################
# Hours 0-23 or 1-12
##########################################################################################

# A number 0-23, zero-padded to two digits
zero_23_2digits = (
    Word('01', nums,  exact=2) |    # 00-19
    Word('2', '0123', exact=2)      # 20-23
)

# A number 0-23, possibly zero-padded or right-justified to two digits
zero_23 = (
    zero_23_2digits |               # 00-23
    Optional(Suppress(Literal(' '))) + Word(nums, exact=1)  # 0-9
)

hour = zero_23.copy()
hour.set_parse_action(lambda s,l,t: _action('HOUR', int(t[0]), s,l,t))

hour_strict = zero_23_2digits.copy()
hour_strict.set_parse_action(lambda s,l,t: _action('HOUR', int(t[0]), s,l,t))

hour_float = Combine(zero_23 + '.' + Optional(Word(nums)))
hour_float.set_parse_action(lambda s,l,t: _action('HOUR', float(t[0]), s,l,t))

hour_float_strict = Combine(zero_23_2digits + '.' + Optional(Word(nums)))
hour_float_strict.set_parse_action(lambda s,l,t: _action('HOUR', float(t[0]), s,l,t))

# A number 1-12, zero-padded to two digits
one_12_2digit = (
    Word('0', nonzero, exact=2) |   # 01-09
    Word('1', '012', exact=2)       # 10-12
)

# A number 1-12, possibly zero-padded or right-justified to two digits
one_12 = (
    one_12_2digit |                 # 01-12
    Optional(Suppress(Literal(' '))) + Word(nonzero, exact=1)   # 1-9
)

hour_am = one_12.copy()
hour_am.set_parse_action(lambda s,l,t: _action('HOUR', int(t[0]) % 12, s,l,t))
# 12 o'clock gets converted to 0, others unchanged

hour_pm = one_12.copy()
hour_pm.set_parse_action(lambda s,l,t: _action('HOUR', 12 + int(t[0]) % 12, s,l,t))
# 12 o'clock gets converted to 12, others to 13-23

hour_am_float = Combine(one_12.copy() + Literal('.') + Word(nums))
hour_am_float.set_parse_action(lambda s,l,t: _action('HOUR', float(t[0])%12, s,l,t))

hour_pm_float = Combine(one_12.copy() + Literal('.') + Word(nums))
hour_pm_float.set_parse_action(lambda s,l,t: _action('HOUR', 12 + float(t[0])%12, s,l,t))

##########################################################################################
# Minutes 0-59 or 0-1439
##########################################################################################

# A number 0-59, zero-padded to two digits
zero_59_2digits = Word('012345', nums, exact=2)

# A number 0-59, two digits but with a possible leading blank instead of zero
zero_59 = zero_59_2digits | Suppress(Literal(' ')) + Word(nums, exact=1)

minute = zero_59.copy()
minute.set_parse_action(lambda s,l,t: _action('MINUTE', int(t[0]), s,l,t))

minute_strict = zero_59_2digits.copy()
minute_strict.set_parse_action(lambda s,l,t: _action('MINUTE', int(t[0]), s,l,t))

# A floating-point number, 0.000-59.999
minute_float = Combine(zero_59 + Literal('.') + Optional(Word(nums)))
minute_float.set_parse_action(lambda s,l,t: _action('MINUTE', float(t[0]), s,l,t))

minute_float_strict = Combine(zero_59_2digits + Literal('.') + Optional(Word(nums)))
minute_float_strict.set_parse_action(lambda s,l,t: _action('MINUTE', float(t[0]), s,l,t))

# A number 0-1439, no leading zeros or white space
zero_1439 = (
    Combine('14' + Word('0123', nums, exact=2)) |   # 1400-1439
    Combine('1'  + Word('0123', nums, exact=3)) |   # 1000-1399
    Word(nonzero, nums, min=1, max=3) |             # 1-999
    Literal('0')                                    # 0
)

minute1439 = zero_1439.copy()
minute1439.set_parse_action(lambda s,l,t: _action('MINUTE', int(t[0]), s,l,t))

minute1439_float = Combine(zero_1439 + Literal('.') + Optional(Word(nums)))
minute1439_float.set_parse_action(lambda s,l,t: _action('MINUTE', float(t[0]), s,l,t))

##########################################################################################
# Seconds 0-59 or 0-86399
##########################################################################################

second = zero_59.copy()
second.set_parse_action(
    lambda s,l,t: _actions(['SECOND', int(t[0]), 'LEAPSEC', False], s,l,t))

second_strict = zero_59_2digits.copy()
second_strict.set_parse_action(
    lambda s,l,t: _actions(['SECOND', int(t[0]), 'LEAPSEC', False], s,l,t))

# A floating-point number, 0.000-59.999
second_float = Combine(zero_59 + Literal('.') + Optional(Word(nums)))
second_float.set_parse_action(
    lambda s,l,t: _actions(['SECOND', float(t[0]), 'LEAPSEC', False], s,l,t))

second_float_strict = Combine(zero_59_2digits + Literal('.') + Optional(Word(nums)))
second_float_strict.set_parse_action(
    lambda s,l,t: _actions(['SECOND', float(t[0]), 'LEAPSEC', False], s,l,t))

# A number 0-86399, no leading zeros or white space
zero_86399 = (
    Combine('86' + Word('0123',   nums, exact=3)) |     # 86000-86399
    Combine('8'  + Word('012345', nums, exact=4)) |     # 80000-85999
    Word('1234567', nums, exact=5)    |                 # 10000-79999
    Word(nonzero, nums, min=1, max=4) |                 # 1-9999
    Literal('0')                                        # 0
)

second86399 = zero_86399.copy()
second86399.set_parse_action(
    lambda s,l,t: _actions(['SECOND', int(t[0]), 'LEAPSEC', False], s,l,t))

second86399_float = Combine(zero_86399 + Literal('.') + Optional(Word(nums)))
second86399_float.set_parse_action(
    lambda s,l,t: _actions(['SECOND', float(t[0]), 'LEAPSEC', False], s,l,t))

##########################################################################################
# Leap seconds 0-69 or 0-86409
##########################################################################################

# A number 0-69, zero-padded to two digits
zero_69_2digits = Word('0123456', nums, exact=2)

# A number 0-69, two digits but with a possible leading blank instead of zero
zero_69 = zero_69_2digits | Suppress(Literal(' ')) + Word(nums, exact=1)

leapsec = zero_69.copy()
leapsec.set_parse_action(
    lambda s,l,t: _actions(['SECOND', int(t[0]), 'LEAPSEC', int(t[0]) >= 60], s,l,t))

leapsec_strict = zero_69_2digits.copy()
leapsec_strict.set_parse_action(
    lambda s,l,t: _actions(['SECOND', int(t[0]), 'LEAPSEC', int(t[0]) >= 60], s,l,t))

# A floating-point number, 0.000-69.999
leapsec_float = Combine(zero_69 + Literal('.') + Optional(Word(nums)))
leapsec_float.set_parse_action(
    lambda s,l,t: _actions(['SECOND', float(t[0]), 'LEAPSEC', float(t[0]) >= 60.], s,l,t))

leapsec_float_strict = Combine(zero_69_2digits + Literal('.') + Optional(Word(nums)))
leapsec_float_strict.set_parse_action(
    lambda s,l,t: _actions(['SECOND', float(t[0]), 'LEAPSEC', float(t[0]) >= 60.], s,l,t))

# A number 0-86409, no leading zeros or white space
zero_86409 = Combine(Literal('8640') + Word(nums, exact=1)) | zero_86399

leapsec86409 = zero_86409.copy()
leapsec86409.set_parse_action(
    lambda s,l,t: _actions(['SECOND', int(t[0]), 'LEAPSEC', int(t[0]) >= 86400], s,l,t))

leapsec86409_float = Combine(zero_86409 + Literal('.') + Optional(Word(nums)))
leapsec86409_float.set_parse_action(
    lambda s,l,t: _actions(['SECOND', float(t[0]), 'LEAPSEC', float(t[0]) >= 86400],
                           s,l,t))

##########################################################################################
# Time zones
##########################################################################################

# As a numeric offset
z_timezone = opt_white + CaselessLiteral('Z')
z_timezone.set_parse_action(
    lambda s,l,t: _actions(['TZ', 'Z', 'TZMIN', 0, 'TIMESYS', 'UTC'], s,l,t))

z_timezone_strict = Literal('Z')
z_timezone_strict.set_parse_action(
    lambda s,l,t: _actions(['TZ', 'Z', 'TZMIN', 0, 'TIMESYS', 'UTC'], s,l,t))

tz_hours = (Literal('-') | Literal('+')) + (
    Word('0', nums, exact=2) |      # 00-09
    Word('1', '01234', exact=2)     # 10-14
)
tz_minutes = one_of(['00', '15', '30', '45'])

def _tzmin(string):       # convert "-hh", "+hh", "-hh:mm", "+hh:mm" to minutes
    sign = -1 if string[0] == '-' else +1
    h = int(string[1:3])
    m = 0 if len(string) == 3 else int(string[3:].lstrip(':'))
    return sign * (60 * h + m)

hhmm_timezone = Combine(tz_hours + Optional(Optional(Literal(':')) + tz_minutes))
hhmm_timezone.set_parse_action(
    lambda s,l,t: _actions(['TZ', t[0], 'TZMIN', _tzmin(t[0]), 'TIMESYS', 'UTC'], s,l,t))

hhmm_tz = z_timezone | hhmm_timezone
opt_hhmm_tz = Optional(hhmm_tz)

# As an abbreviation
def _tzmin_lookup(string):
    return _tzmin(TIMEZONES[string])

named_tz = opt_white + one_of(TIMEZONES.keys(), caseless=True)
named_tz.set_parse_action(
    lambda s,l,t: _actions(['TZ', t[0].upper(), 'TZMIN', _tzmin_lookup(t[0]),
                            'TIMESYS', 'UTC'], s,l,t))
opt_named_tz = Optional(named_tz)

timezone = hhmm_tz | named_tz
opt_timezone = Optional(timezone)

iso_timezone = z_timezone_strict | hhmm_timezone
opt_iso_timezone = Optional(z_timezone_strict | hhmm_timezone)

##########################################################################################
# Time system suffix, e.g., "UTC", "TAI", "TDB", or "TDT"
##########################################################################################

timesys_et = CaselessLiteral('ET')
timesys_et.set_parse_action(lambda s,l,t: _action('TIMESYS', 'TDB', s,l,t))

timesys_tt = CaselessLiteral('TDT')
timesys_tt.set_parse_action(lambda s,l,t: _action('TIMESYS', 'TT', s,l,t))

timesys_utc = one_of(['UTC', 'UT1', 'UT'], caseless=True)
timesys_utc.set_parse_action(lambda s,l,t: _action('TIMESYS', 'UTC', s,l,t))

timesys_z = CaselessLiteral('Z')    # both a time system and a time zone!
timesys_z.set_parse_action(
    lambda s,l,t: _actions(['TIMESYS', 'UTC', 'TZ', 'Z', 'TZMIN', 0], s,l,t))

timesys_other = one_of(['TAI', 'TDB', 'TT'], caseless=True)
timesys_other.set_parse_action(lambda s,l,t: _action('TIMESYS', t[0].upper(), s,l,t))

timesys = opt_white + (timesys_other | timesys_utc| timesys_z | timesys_et | timesys_tt)
opt_timesys = Optional(timesys)
req_timesys = timesys       # because variable "timesys" can be used for other purposes

##########################################################################################
# TIME_PYPARSERS
##########################################################################################

am = opt_white + CaselessLiteral('AM')
pm = opt_white + CaselessLiteral('PM')
am.set_parse_action(lambda s,l,t: _actions([], s,l,t))
pm.set_parse_action(lambda s,l,t: _actions([], s,l,t))

H = CaselessLiteral('h')
M = CaselessLiteral('m')
S = CaselessLiteral('s')
H.set_parse_action(lambda s,l,t: _actions([], s,l,t))
M.set_parse_action(lambda s,l,t: _actions([], s,l,t))
S.set_parse_action(lambda s,l,t: _actions([], s,l,t))

# Seconds parser, index is [leapsec]
s_parsers = [second_float | second, leapsec_float | leapsec]

# Parser for h/m/s notation, index is [leapsec]
hm_parser = ( (hour_float       | hour)       + opt_white + H
            | (minute1439_float | minute1439) + opt_white + M)
hms_parsers = [hm_parser | (second86399_float  | second86399)  + opt_white + S,
               hm_parser | (leapsec86409_float | leapsec86409) + opt_white + S]

opt_timezone_timesys = Optional(timezone | timesys)

# Shape is [floating, leapsecs, timesys, timezones, ampm]
TIME_PYPARSERS = np.empty((2,2,2,2,2), dtype='object')

for l in (0,1):
    opt_colon_s = colon + s_parsers[l] | not_colon

    time = hour + colon + minute + opt_colon_s
    time_ampm = ( hour_am + Optional(colon + minute + opt_colon_s) + am
                | hour_pm + Optional(colon + minute + opt_colon_s) + pm)
    ftime = hour + colon + minute_float + not_colon
    ftime_ampm = ( hour_am + colon + minute_float + am | hour_am_float + am
                 | hour_pm + colon + minute_float + pm | hour_pm_float + pm)
    xtime      = ftime      | time
    xtime_ampm = ftime_ampm | time_ampm

    # [floating, ..., am/pm]
    TIME_PYPARSERS[0,l,0,0,0] = time
    TIME_PYPARSERS[0,l,0,0,1] = time_ampm | time

    TIME_PYPARSERS[1,l,0,0,0] = xtime
    TIME_PYPARSERS[1,l,0,0,1] = xtime_ampm | xtime

    # [floating, ..., time zones, am/pm]
    # Only named, not numeric time zones after am/pm.
    TIME_PYPARSERS[0,l,0,1,0] = time       + opt_timezone
    TIME_PYPARSERS[0,l,0,1,1] = time_ampm  + opt_named_tz | TIME_PYPARSERS[0,l,0,1,0]
    TIME_PYPARSERS[1,l,0,1,0] = xtime      + opt_timezone
    TIME_PYPARSERS[1,l,0,1,1] = xtime_ampm + opt_named_tz | TIME_PYPARSERS[1,l,0,1,0]

    # [floating, ..., time system, time zones, am/pm]
    # The time system can only be specified in the absence of a time zone or am/pm.
    TIME_PYPARSERS[0,l,1,0,0] = time + opt_timesys
    TIME_PYPARSERS[0,l,1,1,0] = time + opt_timezone_timesys
    TIME_PYPARSERS[0,l,1,0,1] = time + timesys | TIME_PYPARSERS[0,l,0,0,1]
    TIME_PYPARSERS[0,l,1,1,1] = time + timesys | TIME_PYPARSERS[0,l,0,1,1]

    TIME_PYPARSERS[1,l,1,0,0] = xtime + opt_timesys
    TIME_PYPARSERS[1,l,1,1,0] = xtime + opt_timezone_timesys
    TIME_PYPARSERS[1,l,1,0,1] = xtime + timesys | TIME_PYPARSERS[1,l,0,0,1]
    TIME_PYPARSERS[1,l,1,1,1] = xtime + timesys | TIME_PYPARSERS[1,l,0,1,1]

    # Augment the floating cases with h/m/s notation
    for s in (0,1):
      for z in (0,1):
        for a in (0,1):
            TIME_PYPARSERS[1,l,s,z,a] |= hms_parsers[l]

##########################################################################################
# ISO_TIME_PYPARSER
##########################################################################################

strict_s = second_float_strict | second_strict
colon_strict_s = colon + strict_s
opt_colon_strict_s = not_colon | Optional(colon_strict_s)

# Seconds parser, index is [leapsec]
s_parsers = [second_float_strict | second_strict, leapsec_float_strict | leapsec_strict]

# Index is [floating, leapsecs, timezones]
ISO_TIME_PYPARSERS = np.empty((2,2,2), dtype='object')

for l in (0,1):
    iso_time = hour_strict + ( colon + minute_strict + (colon + s_parsers[l] | not_colon)
                             | minute_strict + Optional(s_parsers[l])
                             | not_colon)
    iso_ftime = ( hour_float_strict
                | hour_strict + Optional(colon) + minute_float_strict) + not_colon

    ISO_TIME_PYPARSERS[0,l,0] = iso_time
    ISO_TIME_PYPARSERS[0,l,1] = iso_time + opt_iso_timezone
    ISO_TIME_PYPARSERS[1,l,0] = iso_ftime | iso_time
    ISO_TIME_PYPARSERS[1,l,1] = ISO_TIME_PYPARSERS[1,l,0] + opt_iso_timezone

##########################################################################################
# PyParser constructor function
##########################################################################################

[docs] def time_pyparser(*, leapsecs=False, ampm=False, timezones=False, timesys=False, floating=False, iso_only=False, padding=True, embedded=False): """A time pyparser. Parameters: leapsecs (bool, optional): True to recognize leap seconds. ampm (bool, optional): True to recognize "am" and "pm" suffixes. timezones (bool, optional): True to recognize and interpret time zones. If True, returned values are adjusted to UTC. timesys (bool, optional): True to recognize an embedded time system such as "UTC", "TAI", etc. floating (bool, optional): True to allow times specified using floating-point values of hours or minutes. iso_only (bool, optional): Require an ISO 8601:1988-compatible time string; ignore `ampm`, `timesys`, and `floating` options. padding (bool, optional): True to ignore leading or trailing white space. embedded (bool, optional): True to allow the time to be followed by additional text. Returns: pyparsing.ParserElement: A parser for the selected syntax. Calling the as_list() method on the returned ParseResult object returns a list containing some but not all of these tuples: * ("HOUR", hour): Hour if specified, 0-23, as an int or possibly a float. Hours am/pm are converted to the range 0-23 automatically. * ("MINUTE", minute): Minute if specified, integer or float. * ("SECOND", second): Second if specified, integer or float. * ("LEAPSEC", True): Present and True if this is a leap second. * ("TZ", tz_name): Name of the time zone if specified. * ("TZMIN", tzmin): Offset of the time zone in minutes. * ("TIMESYS", name): Time system if specified: "UTC", "TAI", "TDB", or "TDT". * ("~", number): The last occurrence of this tuple in the list contains the number of characters matched. """ if iso_only: pyparser = ISO_TIME_PYPARSERS[int(floating), int(leapsecs), int(timezones)] else: pyparser = TIME_PYPARSERS[int(floating), int(leapsecs), int(timesys), int(timezones), int(ampm)] if padding: pyparser = opt_white + pyparser if embedded: pyparser = pyparser + ~FollowedBy(alphanums) elif padding: pyparser = pyparser + opt_white + StringEnd() else: pyparser = pyparser + StringEnd() return pyparser
##########################################################################################