
Adaptation of patch presented here: http://code.google.com/p/pyiso8601/issues/detail?id=15
153 lines
5.0 KiB
Python
153 lines
5.0 KiB
Python
"""
|
|
Copyright (c) 2007 Michael Twomey
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a
|
|
copy of this software and associated documentation files (the
|
|
"Software"), to deal in the Software without restriction, including
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
ISO 8601 date time string parsing
|
|
|
|
Basic usage:
|
|
>>> import iso8601
|
|
>>> iso8601.parse_date("2007-01-25T12:00:00Z")
|
|
datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.iso8601.Utc ...>)
|
|
>>>
|
|
"""
|
|
|
|
|
|
from datetime import datetime, timedelta, tzinfo
|
|
import re
|
|
|
|
from .compat import string_types
|
|
|
|
__all__ = ["parse_date", "ParseError", "Utc", "FixedOffset"]
|
|
|
|
# Adapted from http://delete.me.uk/2005/03/iso8601.html
|
|
ISO8601_REGEX = re.compile(
|
|
r"(?P<year>[0-9]{4})(-(?P<month>[0-9]{1,2})(-(?P<day>[0-9]{1,2})"
|
|
r"((?P<separator>.)(?P<hour>[0-9]{2})(:(?P<minute>[0-9]{2})(:(?P<second>[0-9]{2})(\.(?P<fraction>[0-9]+))?)?)?"
|
|
r"(?P<timezone>Z|(([-+])([0-9]{2})(:?([0-9]{2}))?))?)?)?)?"
|
|
)
|
|
TIMEZONE_REGEX = re.compile(
|
|
"(?P<prefix>[+-])(?P<hours>[0-9]{2})(:?(?P<minutes>[0-9]{2}))?")
|
|
|
|
class ParseError(Exception):
|
|
"""Raised when there is a problem parsing a date string"""
|
|
|
|
# Yoinked from python docs
|
|
ZERO = timedelta(0)
|
|
class Utc(tzinfo):
|
|
"""UTC
|
|
|
|
"""
|
|
def utcoffset(self, dt):
|
|
return ZERO
|
|
|
|
def tzname(self, dt):
|
|
return "UTC"
|
|
|
|
def dst(self, dt):
|
|
return ZERO
|
|
UTC = Utc()
|
|
|
|
class FixedOffset(tzinfo):
|
|
"""Fixed offset in hours and minutes from UTC
|
|
|
|
"""
|
|
def __init__(self, offset_hours, offset_minutes, name):
|
|
self.__offset = timedelta(hours=offset_hours, minutes=offset_minutes)
|
|
self.__name = name
|
|
|
|
def __getinitargs__(self):
|
|
# tzinfo.__reduce__ returns the type as the factory: supply
|
|
# defaults here, rather than in __init__.
|
|
return 0, 0, 'unknown'
|
|
|
|
def utcoffset(self, dt):
|
|
return self.__offset
|
|
|
|
def tzname(self, dt):
|
|
return self.__name
|
|
|
|
def dst(self, dt):
|
|
return ZERO
|
|
|
|
def __repr__(self):
|
|
return "<FixedOffset %r>" % self.__name
|
|
|
|
def parse_timezone(tzstring, default_timezone=UTC):
|
|
"""Parses ISO 8601 time zone specs into tzinfo offsets
|
|
|
|
"""
|
|
if tzstring == "Z":
|
|
return UTC
|
|
# This isn't strictly correct, but it's common to encounter dates without
|
|
# timezones so I'll assume the default (which defaults to UTC).
|
|
# Addresses issue 4.
|
|
if tzstring is None:
|
|
return default_timezone
|
|
m = TIMEZONE_REGEX.match(tzstring)
|
|
prefix = m.group('prefix')
|
|
hours = int(m.group('hours'))
|
|
minutes = m.group('minutes')
|
|
if minutes is None:
|
|
minutes = 0
|
|
else:
|
|
minutes = int(minutes)
|
|
if prefix == "-":
|
|
hours = -hours
|
|
minutes = -minutes
|
|
return FixedOffset(hours, minutes, tzstring)
|
|
|
|
def parse_date(datestring, default_timezone=UTC):
|
|
"""Parses ISO 8601 dates into datetime objects
|
|
|
|
The timezone is parsed from the date string. However it is quite common to
|
|
have dates without a timezone (not strictly correct). In this case the
|
|
default timezone specified in default_timezone is used. This is UTC by
|
|
default.
|
|
"""
|
|
if not isinstance(datestring, string_types):
|
|
raise ParseError("Expecting a string %r" % datestring)
|
|
m = ISO8601_REGEX.match(datestring)
|
|
if not m:
|
|
raise ParseError("Unable to parse date string %r" % datestring)
|
|
groups = m.groupdict()
|
|
tz = parse_timezone(groups["timezone"], default_timezone=default_timezone)
|
|
if (groups['year'] is None or
|
|
groups['month'] is None or
|
|
groups['day'] is None):
|
|
raise ParseError('Unable to parse date string %r' % datestring)
|
|
if groups["hour"] is None:
|
|
groups["hour"] = 0
|
|
if groups["minute"] is None:
|
|
groups["minute"] = 0
|
|
if groups["second"] is None:
|
|
groups["second"] = 0
|
|
if groups["fraction"] is None:
|
|
groups["fraction"] = 0
|
|
else:
|
|
groups["fraction"] = int(float("0.%s" % groups["fraction"]) * 1e6)
|
|
try:
|
|
return datetime(
|
|
int(groups["year"]), int(groups["month"]), int(groups["day"]),
|
|
int(groups["hour"]), int(groups["minute"]), int(groups["second"]),
|
|
int(groups["fraction"]), tz)
|
|
except ValueError as e:
|
|
raise ParseError(*e.args)
|