Skip to content

refactor: add typing to & do maintenance of periods #1223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,38 @@
# Changelog

# 42.0.0 [#1223](https://github.com/openfisca/openfisca-core/pull/1223)

#### Breaking changes

- Changes to `eternity` instants and periods
- Eternity instants are now `<Instant(-1, -1, -1)>` instead of
`<Instant(inf, inf, inf)>`
- Eternity periods are now `<Period(('eternity', <Instant(-1, -1, -1)>, -1))>`
instead of `<Period(('eternity', <Instant(inf, inf, inf)>, inf))>`
- The reason is to avoid mixing data types: `inf` is a float, periods and
instants are integers. Mixed data types make memory optimisations impossible.
- Migration should be straightforward. If you have a test that checks for
`inf`, you should update it to check for `-1` or use the `is_eternal` method.
- `periods.instant` no longer returns `None`
- Now, it raises `periods.InstantError`

#### New features

- Introduce `Instant.eternity()`
- This behaviour was duplicated across
- Now it is encapsulated in a single method
- Introduce `Instant.is_eternal` and `Period.is_eternal`
- These methods check if the instant or period are eternity (`bool`).
- Now `periods.instant` parses also ISO calendar strings (weeks)
- For instance, `2022-W01` is now a valid input

#### Technical changes

- Update `pendulum`
- Reduce code complexity
- Remove run-time type-checks
- Add typing to the periods module

### 41.5.7 [#1225](https://github.com/openfisca/openfisca-core/pull/1225)

#### Technical changes
Expand Down
5 changes: 4 additions & 1 deletion openfisca_core/commons/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ def empty_clone(original: object) -> object:

"""

def __init__(_: object) -> None: ...

Dummy = type(
"Dummy",
(original.__class__,),
{"__init__": lambda _: None},
{"__init__": __init__},
)

new = Dummy()
Expand Down Expand Up @@ -69,6 +71,7 @@ def stringify_array(array: None | t.Array[numpy.generic]) -> str:
"[<class 'list'>, {}, <function stringify_array...]"

"""

if array is None:
return "None"

Expand Down
Empty file.
55 changes: 44 additions & 11 deletions openfisca_core/periods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,59 @@
#
# See: https://www.python.org/dev/peps/pep-0008/#imports

from .config import ( # noqa: F401
DAY,
ETERNITY,
from . import types
from ._errors import InstantError, ParserError, PeriodError
from .config import (
INSTANT_PATTERN,
MONTH,
WEEK,
WEEKDAY,
YEAR,
date_by_instant_cache,
str_by_instant_cache,
year_or_month_or_day_re,
)
from .date_unit import DateUnit # noqa: F401
from .helpers import ( # noqa: F401
from .date_unit import DateUnit
from .helpers import (
instant,
instant_date,
key_period_size,
period,
unit_weight,
unit_weights,
)
from .instant_ import Instant # noqa: F401
from .period_ import Period # noqa: F401
from .instant_ import Instant
from .period_ import Period

WEEKDAY = DateUnit.WEEKDAY
WEEK = DateUnit.WEEK
DAY = DateUnit.DAY
MONTH = DateUnit.MONTH
YEAR = DateUnit.YEAR
ETERNITY = DateUnit.ETERNITY
ISOFORMAT = DateUnit.isoformat
ISOCALENDAR = DateUnit.isocalendar

__all__ = [
"DAY",
"DateUnit",
"ETERNITY",
"INSTANT_PATTERN",
"ISOCALENDAR",
"ISOFORMAT",
"Instant",
"InstantError",
"MONTH",
"ParserError",
"Period",
"PeriodError",
"WEEK",
"WEEKDAY",
"YEAR",
"date_by_instant_cache",
"instant",
"instant_date",
"key_period_size",
"period",
"str_by_instant_cache",
"types",
"unit_weight",
"unit_weights",
"year_or_month_or_day_re",
]
28 changes: 28 additions & 0 deletions openfisca_core/periods/_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pendulum.parsing.exceptions import ParserError


class InstantError(ValueError):
"""Raised when an invalid instant-like is provided."""

def __init__(self, value: str) -> None:
msg = (
f"'{value}' is not a valid instant string. Instants are described "
"using either the 'YYYY-MM-DD' format, for instance '2015-06-15', "
"or the 'YYYY-Www-D' format, for instance '2015-W24-1'."
)
super().__init__(msg)


class PeriodError(ValueError):
"""Raised when an invalid period-like is provided."""

def __init__(self, value: str) -> None:
msg = (
"Expected a period (eg. '2017', 'month:2017-01', 'week:2017-W01-1:3', "
f"...); got: '{value}'. Learn more about legal period formats in "
"OpenFisca: <https://openfisca.org/doc/coding-the-legislation/35_periods.html#periods-in-simulations>."
)
super().__init__(msg)


__all__ = ["InstantError", "ParserError", "PeriodError"]
116 changes: 70 additions & 46 deletions openfisca_core/periods/_parsers.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,92 @@
from typing import Optional
"""To parse periods and instants from strings."""

import re
from __future__ import annotations

import datetime

import pendulum
from pendulum.datetime import Date
from pendulum.parsing import ParserError

from . import types as t
from ._errors import InstantError, ParserError, PeriodError
from .date_unit import DateUnit
from .instant_ import Instant
from .period_ import Period

invalid_week = re.compile(r".*(W[1-9]|W[1-9]-[0-9]|W[0-5][0-9]-0)$")

def parse_instant(value: str) -> t.Instant:
"""Parse a string into an instant.

Args:
value (str): The string to parse.

Returns:
An InstantStr.

Raises:
InstantError: When the string is not a valid ISO Calendar/Format.
ParserError: When the string couldn't be parsed.

Examples:
>>> parse_instant("2022")
Instant((2022, 1, 1))

>>> parse_instant("2022-02")
Instant((2022, 2, 1))

>>> parse_instant("2022-W02-7")
Instant((2022, 1, 16))

def _parse_period(value: str) -> Optional[Period]:
>>> parse_instant("2022-W013")
Traceback (most recent call last):
openfisca_core.periods._errors.InstantError: '2022-W013' is not a va...

>>> parse_instant("2022-02-29")
Traceback (most recent call last):
pendulum.parsing.exceptions.ParserError: Unable to parse string [202...

"""

if not isinstance(value, t.InstantStr):
raise InstantError(str(value))

date = pendulum.parse(value, exact=True)

if not isinstance(date, datetime.date):
msg = f"Unable to parse string [{value}]"
raise ParserError(msg)

return Instant((date.year, date.month, date.day))


def parse_period(value: str) -> t.Period:
"""Parses ISO format/calendar periods.

Such as "2012" or "2015-03".

Examples:
>>> _parse_period("2022")
>>> parse_period("2022")
Period((<DateUnit.YEAR: 'year'>, Instant((2022, 1, 1)), 1))

>>> _parse_period("2022-02")
>>> parse_period("2022-02")
Period((<DateUnit.MONTH: 'month'>, Instant((2022, 2, 1)), 1))

>>> _parse_period("2022-W02-7")
>>> parse_period("2022-W02-7")
Period((<DateUnit.WEEKDAY: 'weekday'>, Instant((2022, 1, 16)), 1))

"""
# If it's a complex period, next!
if len(value.split(":")) != 1:
return None

# Check for a non-empty string.
if not (value and isinstance(value, str)):
raise AttributeError
try:
instant = parse_instant(value)

# If it's negative, next!
if value[0] == "-":
raise ValueError
except InstantError as error:
raise PeriodError(value) from error

# If it's an invalid week, next!
if invalid_week.match(value):
raise ParserError

unit = _parse_unit(value)
date = pendulum.parse(value, exact=True)

if not isinstance(date, Date):
raise ValueError

instant = Instant((date.year, date.month, date.day))
unit = parse_unit(value)

return Period((unit, instant, 1))


def _parse_unit(value: str) -> DateUnit:
def parse_unit(value: str) -> t.DateUnit:
"""Determine the date unit of a date string.

Args:
Expand All @@ -66,32 +96,26 @@ def _parse_unit(value: str) -> DateUnit:
A DateUnit.

Raises:
ValueError when no DateUnit can be determined.
InstantError: when no DateUnit can be determined.

Examples:
>>> _parse_unit("2022")
>>> parse_unit("2022")
<DateUnit.YEAR: 'year'>

>>> _parse_unit("2022-W03-01")
>>> parse_unit("2022-W03-1")
<DateUnit.WEEKDAY: 'weekday'>

"""
length = len(value.split("-"))
isweek = value.find("W") != -1

if length == 1:
return DateUnit.YEAR
if not isinstance(value, t.InstantStr):
raise InstantError(str(value))

if length == 2:
if isweek:
return DateUnit.WEEK
length = len(value.split("-"))

return DateUnit.MONTH
if isinstance(value, t.ISOCalendarStr):
return DateUnit.isocalendar[-length]

if length == 3:
if isweek:
return DateUnit.WEEKDAY
return DateUnit.isoformat[-length]

return DateUnit.DAY

raise ValueError
__all__ = ["parse_instant", "parse_period", "parse_unit"]
16 changes: 7 additions & 9 deletions openfisca_core/periods/config.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
import re

from .date_unit import DateUnit
import pendulum

WEEKDAY = DateUnit.WEEKDAY
WEEK = DateUnit.WEEK
DAY = DateUnit.DAY
MONTH = DateUnit.MONTH
YEAR = DateUnit.YEAR
ETERNITY = DateUnit.ETERNITY
from . import types as t

# Matches "2015", "2015-01", "2015-01-01"
# Does not match "2015-13", "2015-12-32"
INSTANT_PATTERN = re.compile(
r"^\d{4}(-(0[1-9]|1[012]))?(-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01]))?$",
)

date_by_instant_cache: dict = {}
str_by_instant_cache: dict = {}
date_by_instant_cache: dict[t.Instant, pendulum.Date] = {}
str_by_instant_cache: dict[t.Instant, t.InstantStr] = {}
year_or_month_or_day_re = re.compile(
r"(18|19|20)\d{2}(-(0?[1-9]|1[0-2])(-([0-2]?\d|3[0-1]))?)?$",
)


__all__ = ["INSTANT_PATTERN", "date_by_instant_cache", "str_by_instant_cache"]
Loading
Loading