##########################################################################################
# julian/date_pyparser.py
##########################################################################################
"""
=======================
Date pyparsing Grammars
=======================
"""
import numpy as np
from julian.mjd_pyparser import mjd_pyparser
from pyparsing import (
CaselessLiteral,
Combine,
FollowedBy,
Literal,
OneOrMore,
Optional,
ParserElement,
StringEnd,
Suppress,
White,
Word,
WordEnd,
ZeroOrMore,
alphanums,
alphas,
nums,
one_of,
srange,
)
##########################################################################################
# Begin grammar
##########################################################################################
# All whitespace is handled explicitly
ParserElement.set_default_whitespace_chars('')
# Useful definitions...
white = Suppress(OneOrMore(White()))
opt_white = Suppress(ZeroOrMore(White()))
comma = Suppress(Literal(',') + ZeroOrMore(White()))
opt_comma = opt_white + Suppress(Literal(',')) + opt_white | white
alpha_end = WordEnd(alphas)
num_end = WordEnd(nums)
word_end = WordEnd(alphanums)
nonzero = srange('[1-9]')
def _action(name, value, s, l, t):
return [(name, value), ('~', s.upper().index(t[0].upper(), l) + len(t[0]))]
def _no_action(s, l, t):
return [('~', s.upper().index(t[0].upper(), l) + len(t[0]))]
##########################################################################################
# Year
##########################################################################################
year_4digit = Word(nums, exact=4)
year_4digit.set_parse_action(lambda s,l,t: _action('YEAR', int(t[0]), s,l,t))
year_strict = Word('12', nums, exact=4)
year_strict.set_parse_action(lambda s,l,t: _action('YEAR', int(t[0]), s,l,t))
# Year 00-49, assumed 2000 to 2049
year04_2digit = Word('01234', nums, exact=2)
year04_2digit.set_parse_action(lambda s,l,t: _action('YEAR', 2000 + int(t[0]), s,l,t))
# Year 50-99, assumed 1950 to 1999
year59_2digit = Word('56789', nums, exact=2)
year59_2digit.set_parse_action(lambda s,l,t: _action('YEAR', 1900 + int(t[0]), s,l,t))
year_2digit = year04_2digit | year59_2digit
year = year_4digit | year_2digit
##########################################################################################
# General year (negative, CE, BCE, etc.)
##########################################################################################
# Extended year format for the revised ISO 8601 standard
signed_year = Combine(one_of(['-', '+']) + Word(nums, min=4))
signed_year.set_parse_action(lambda s,l,t: _action('YEAR', int(t[0]), s,l,t))
# CE years
unsigned_ce_year = Word(nonzero, nums)
unsigned_ce_year.set_parse_action(lambda s,l,t: _action('YEAR', int(t[0]), s,l,t))
ce_suffix = one_of(['CE', 'AD'], caseless=True)
ce_suffix.set_parse_action(lambda s,l,t: _no_action(s,l,t))
ce_suffixed_year = unsigned_ce_year + opt_white + ce_suffix
ad_year = Suppress(CaselessLiteral('AD')) + opt_white + unsigned_ce_year
ce_year = ad_year | ce_suffixed_year
# BCE years
unsigned_bce_year = Word(nonzero, nums)
unsigned_bce_year.set_parse_action(lambda s,l,t: _action('YEAR', 1 - int(t[0]), s,l,t))
bce_suffix = one_of(['BCE', 'BC'], caseless=True)
bce_suffix.set_parse_action(lambda s,l,t: _no_action(s,l,t))
bce_year = unsigned_bce_year + opt_white + bce_suffix
suffixed_year = ce_suffixed_year | bce_year
extended_year = ce_year | bce_year | signed_year
##########################################################################################
# Month
##########################################################################################
# Full month names
FULL_MONTHS = {'JANUARY':1, 'FEBRUARY':2, 'MARCH':3, 'APRIL':4,'MAY':5, 'JUNE':6,
'JULY':7, 'AUGUST':8, 'SEPTEMBER':9, 'OCTOBER':10, 'NOVEMBER':11,
'DECEMBER':12}
full_month = one_of(FULL_MONTHS.keys(), caseless=True) + alpha_end
full_month.set_parse_action(lambda s,l,t: _action('MONTH', FULL_MONTHS[t[0].upper()],
s,l,t))
# Three-letter abbreviations
ABBREV_MONTHS = {'JAN':1, 'FEB':2, 'MAR':3, 'APR':4,'MAY':5, 'JUN':6, 'JUL':7, 'AUG':8,
'SEP':9, 'OCT':10, 'NOV':11, 'DEC':12}
abbrev_month = one_of(ABBREV_MONTHS.keys(), caseless=True) + alpha_end
abbrev_month.set_parse_action(lambda s,l,t: _action('MONTH', ABBREV_MONTHS[t[0].upper()],
s,l,t))
# Number 1-12 with zero padding to two digits
one_12_2digit = (
Word('0', nonzero, exact=2) | # 01-09
Word('1', '012', exact=2) # 10-12
)
# Number 1-12, optionally zero-padded or right justified to width 2
one_12 = (
one_12_2digit | # 01-12
Combine(Optional(Literal(' ')) + Word(nonzero, exact=1)) # 1-9
)
month_2digit = one_12_2digit.copy()
month_2digit.set_parse_action(lambda s,l,t: _action('MONTH', int(t[0]), s,l,t))
numeric_month = one_12.copy()
numeric_month.set_parse_action(lambda s,l,t: _action('MONTH', int(t[0]), s,l,t))
# Note: full_month must appear before abbrev_month!
month = full_month | abbrev_month + Suppress(Optional('.')) | numeric_month
month_strict = full_month | abbrev_month + Suppress(Optional('.'))
dotless_month = full_month | abbrev_month | numeric_month
##########################################################################################
# Date 1-31 or fractional date 1.000 to 31.999
##########################################################################################
# 01-31
one_31_2digit = (
Word('0', nonzero, exact=2) | # 01-09
Word('12', nums, exact=2) | # 10-29
Word('3', '01', exact=2) # 30-31
)
# 1-31, optionally zero-padded or right-justified to width two
one_31 = (
one_31_2digit | # 01-31
Combine(Optional(Suppress(Literal(' '))) + Word(nonzero, exact=1)) # 1-9
)
date = one_31.copy() + num_end
date.set_parse_action(lambda s,l,t: _action('DAY', int(t[0]), s,l,t))
date_2digit = one_31_2digit.copy() + num_end
date_2digit.set_parse_action(lambda s,l,t: _action('DAY', int(t[0]), s,l,t))
date_float = Combine(one_31.copy() + Literal('.') + Optional(Word(nums)))
date_float.set_parse_action(lambda s,l,t: _action('DAY', float(t[0]), s,l,t))
date_2digit_float = Combine(one_31_2digit.copy() + Literal('.') + Optional(Word(nums)))
date_2digit_float.set_parse_action(lambda s,l,t: _action('DAY', float(t[0]), s,l,t))
##########################################################################################
# Day of year 1-366 or fractional day of year 1.000 to 366.999
##########################################################################################
# 001-366
one_366_3digit = (
# 001-099
Combine(Literal('0') + (Word('0',nonzero,exact=2) | Word(nonzero,nums,exact=2))) |
# 100-299
Word('12', nums, exact=3) |
# 300-359 or 360-366
Combine(Literal('3') + (Word('012345',nums,exact=2) | Word('6','0123456',exact=2)))
)
# 1-366, optionally zero-padded or right-justified to width 3
one_366 = (
one_366_3digit |
# 10-99 with optional leading blank
Combine(Optional(Suppress(Literal(' '))) + Word(nonzero, nums, exact=2)) |
# 1-9, with zero or two leading blanks (must come after the above!)
Combine(Optional(Suppress(Literal(' '))) + Word(nonzero, exact=1))
)
doy = one_366.copy() + num_end
doy.set_parse_action(lambda s,l,t: _action('DAY', int(t[0]), s,l,t))
doy_3digit = one_366_3digit.copy() + num_end
doy_3digit.set_parse_action(lambda s,l,t: _action('DAY', int(t[0]), s,l,t))
doy_float = Combine(one_366.copy() + Literal('.') + Optional(Word(nums)))
doy_float.set_parse_action(lambda s,l,t: _action('DAY', float(t[0]), s,l,t))
doy_3digit_float = Combine(one_366_3digit.copy() + Literal('.') + Optional(Word(nums)))
doy_3digit_float.set_parse_action(lambda s,l,t: _action('DAY', float(t[0]), s,l,t))
##########################################################################################
# Weekday
##########################################################################################
# Full weekday names
full_weekday = one_of(['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY',
'THURSDAY', 'FRIDAY', 'SATURDAY'], caseless=True) + alpha_end
full_weekday.set_parse_action(lambda s,l,t: _action('WEEKDAY', t[0][:3].upper(), s,l,t))
# Three-letter abbreviations
abbrev_weekday = one_of(['SUN', 'MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT'],
caseless=True) + alpha_end
abbrev_weekday.set_parse_action(lambda s,l,t: _action('WEEKDAY', t[0].upper(), s,l,t))
# Note: full_weekday must appear before abbrev_weekday
weekday = full_weekday | abbrev_weekday + Optional(Suppress(Literal('.')))
opt_weekday = Optional(weekday + opt_comma)
##########################################################################################
# Date pyparsers
##########################################################################################
# Index order is [order][extended years, floating, strict]
# floating = 0: no decimal point
# floating = 1: decimal point optional
# floating = 2: decimal point required
ymd_parser = np.empty((2,3,2), dtype='object')
mdy_parser = np.empty((2,3,2), dtype='object')
dmy_parser = np.empty((2,3,2), dtype='object')
dash = opt_white + Suppress(Literal('-')) + opt_white
slash = opt_white + Suppress(Literal('/')) + opt_white
dot = opt_white + Suppress(Literal('.')) + opt_white
# [strict, extended years]
y_signed = [[year , signed_year | year ],
[year_strict, signed_year | year_strict]]
y_extended = [[year , extended_year | year ],
[year_strict, extended_year | year_strict]]
y_suffixed = [[year , suffixed_year | year ],
[year_strict, suffixed_year | year_strict]]
# [strict]
m_parsers = [month, month_strict]
# [floating, strict]
d_parsers = [[date , date_2digit ],
[date_float | date, date_2digit_float | date_2digit],
[date_float , date_2digit_float ]]
# Year-month-day order
for x in range(2):
for f in range(3):
for s in range(2):
dot_md = dot + dotless_month + dot + date # no period in month or date
punc_md = (dash + month + dash | slash + month + slash) + d_parsers[f][0]
space_ymd = y_signed[0][x] + white + m_parsers[s] + white + d_parsers[f][0]
ymd_parser[x,f,s] = (y_signed[0][x] + (dot_md | punc_md) | space_ymd)
# Augment non-strict YMD options with compressed formats
# Note compressed options must be split, 2-digit year before 4-digit year.
compressed_ymd = ( year_2digit + month_2digit + d_parsers[f][1]
| year_4digit + month_2digit + d_parsers[f][1])
if x == 1:
compressed_ymd |= signed_year + month_2digit + d_parsers[f][1]
ymd_parser[x,f,0] |= compressed_ymd
# Month-day-year order
for x in range(2):
for f in range(3):
for s in range(2):
dot_md = dotless_month + dot + date + dot # no period in month or date
punc_md = month + ( dash + d_parsers[f][0] + dash
| slash + d_parsers[f][0] + slash)
space_mdy = m_parsers[s] + white + d_parsers[f][0] + opt_comma + y_extended[0][x]
mdy_parser[x,f,s] = space_mdy | (dot_md | punc_md) + y_suffixed[0][x]
# Day-month-year order
for x in range(2):
for f in range(3):
for s in range(2):
dot_dm = date + dot + dotless_month + dot # no period in month or date
punc_dm = d_parsers[f][0] + (dash + month + dash | slash + month + slash)
space_dmy = d_parsers[f][0] + white + m_parsers[s] + opt_comma + y_extended[0][x]
dmy_parser[x,f,s] = space_dmy | (dot_dm | punc_dm) + y_suffixed[0][x]
# Convert required order to preference order; index is [order][extended, floating, strict]
# Also, require num_end
DATE_PYPARSERS = {}
DATE_PYPARSERS['YMD'] = np.empty((2,3,2), dtype='object')
DATE_PYPARSERS['MDY'] = np.empty((2,3,2), dtype='object')
DATE_PYPARSERS['DMY'] = np.empty((2,3,2), dtype='object')
for x in range(2):
for f in range(3):
for s in range(2):
DATE_PYPARSERS['YMD'][x,f,s] = (ymd_parser[x,f,s] | mdy_parser[x,f,s] |
dmy_parser[x,f,s]) + ~FollowedBy(nums)
DATE_PYPARSERS['MDY'][x,f,s] = (mdy_parser[x,f,s] | ymd_parser[x,f,s] |
dmy_parser[x,f,s]) + ~FollowedBy(nums)
DATE_PYPARSERS['DMY'][x,f,s] = (dmy_parser[x,f,s] | mdy_parser[x,f,s] |
ymd_parser[x,f,s]) + ~FollowedBy(nums)
##########################################################################################
# Year and day-of-year pyparsers
##########################################################################################
# [floating, strict]
doy_parsers = [[doy , doy_3digit ],
[doy_float | doy, doy_3digit_float | doy_3digit],
[doy_float , doy_3digit_float ]]
# Index order is [extended years, floating, strict]
YD_PYPARSERS = np.empty((2,3,2), dtype='object')
for x in range(2):
for f in range(3):
YD_PYPARSERS[x,f,0] = (( y_signed[0][x] + (dash|slash|white) + doy_parsers[f][0]
| y_signed[0][x] + dot + doy_parsers[0][0]
| year_2digit + doy_parsers[f][1] # compressed options
| year_4digit + doy_parsers[f][1])
+ ~FollowedBy(nums))
YD_PYPARSERS[x,f,1] = (y_signed[1][x] + (dash|slash) + doy_parsers[f][1]
+ ~FollowedBy(nums))
##########################################################################################
# ISO date pyparsers
##########################################################################################
dash_ = Suppress(Literal('-'))
# Index order is [extended years, floating, doy]
ISO_DATE_PYPARSERS = np.empty((2,3,2), dtype='object')
for f in range(3):
ISO_DATE_PYPARSERS[0,f,0] = ( # compressed 2- and 4-digit options must be split
year + dash_ + month_2digit + dash_ + d_parsers[f][1] |
year_2digit + month_2digit + d_parsers[f][1] |
year_4digit + month_2digit + d_parsers[f][1]
)
ISO_DATE_PYPARSERS[0,f,1] = (
year + dash_ + (doy_parsers[f][1] | month_2digit + dash_ + d_parsers[f][1]) |
year_2digit + (doy_parsers[f][1] | month_2digit + d_parsers[f][1]) |
year_4digit + (doy_parsers[f][1] | month_2digit + d_parsers[f][1])
)
ISO_DATE_PYPARSERS[1,f,0] = (
signed_year + dash_ + month_2digit + dash_ + d_parsers[f][1] |
ISO_DATE_PYPARSERS[0,f,0]
)
ISO_DATE_PYPARSERS[1,f,1] = (
signed_year + dash_ + (doy_parsers[f][1] |
month_2digit + dash_ + d_parsers[f][1]) |
ISO_DATE_PYPARSERS[0,f,1]
)
for x in range(2):
for f in range(3):
for d in range(2):
ISO_DATE_PYPARSERS[x,f,d] += ~FollowedBy(nums)
##########################################################################################
# PyParser constructor function
##########################################################################################
[docs]
def date_pyparser(order='YMD', *, strict=False, doy=False, mjd=False, weekdays=False,
floating=False, floating_only=False, extended=False, iso_only=False,
padding=True, embedded=False):
"""A date parser.
Parameters:
order (str):
One of "YMD", "MDY", or "DMY", defining the default order for day month, and
year in situations where it might be ambiguous.
strict (bool, optional):
True for a stricter parser, which is less likely to match strings that might
not actually represent dates.
doy (bool, optional):
True to recognize dates specified as year and day-of-year.
mjd (bool, optional):
True to recognize Modified Julian Dates.
weekdays (bool, optional):
True to allow dates including weekdays.
floating (bool, optional):
True to allow fractional days. If False, the mjd option only supports integer
MJD dates.
floating_only (bool, optional):
True to require the date to contain a decimal point.
extended (bool, optional):
True to support extended year values: signed (with at least four digits) and
those involving "CE", "BCE", "AD", "BC".
iso_only (bool, optional):
Require an ISO 8601:1988-compatible date string; ignore `order`, `strict`,
`mjd`, and `weekdays` options.
padding (bool, optional):
True to ignore leading or trailing white space.
embedded (bool, optional):
True to allow the time to be followed by additional text.
Returns:
pyparsing.ParserElement: A parser for the selected syntax. Calling the `as_list()`
method on the returned ParseResult object returns a list containing some but not
all of these tuples, depending on what appears in the parsed string:
* ("YEAR", year): Year if specified; two-digit years are converted to 1970-2069.
Alternatively, "MJD" or "JD" if the day number is to be interpreted as a Julian
or Modified Julian date.
* ("MONTH", month): Month if specified, 1-12.
* ("DAY", day); Day number: 1-31 if a month was specified; 1-366 if a day of year
was specified; otherwise, the MJD or JD day value.
* ("WEEKDAY", abbrev): Day of the week if provided, as an abbreviated uppercase
name: "MON", "TUE", etc.
* ("TIMESYS", name): "UTC" for an MJD or JD date; "TDB" for an MJED or JED date;
"TT" for an MJTD or JTD date.
* ("~", number): The last occurrence of this tuple in the list contains the number
of characters matched.
"""
ifloating = 2 if floating_only else 1 if floating else 0
if iso_only:
pyparser = ISO_DATE_PYPARSERS[int(extended), ifloating, int(doy)]
else:
pyparser = DATE_PYPARSERS[order][int(extended), ifloating, int(strict)]
if doy:
pyparser |= YD_PYPARSERS[int(extended), ifloating, int(strict)]
if weekdays:
pyparser = opt_weekday + pyparser
if mjd:
pyparser = mjd_pyparser(floating=floating, timesys=floating,
padding=False, embedded=True) | pyparser
if padding:
pyparser = opt_white + pyparser + opt_white
if not embedded:
pyparser = pyparser + StringEnd()
return pyparser
##########################################################################################