Source code for tidypolars_extra.lubridate

import polars as pl
from .utils import _col_expr

__all__ = [
    "as_date",
    "as_datetime",
    "ceiling_date",
    "days",
    "difftime",
    "floor_date",
    "hour",
    "hours",
    "make_date",
    "make_datetime",
    "mday",
    "microseconds",
    "milliseconds",
    "minute",
    "minutes",
    "month",
    "now",
    "quarter",
    "dt_round",
    "second",
    "seconds",
    "today",
    "wday",
    "week",
    "weeks",
    "yday",
    "year"
]

[docs] def as_date(x, fmt = None): """ Convert a string to a Date Parameters ---------- x : Expr, Series Column to operate on fmt: str "yyyy-mm-dd" Examples -------- >>> df = tp.tibble(x = ['2021-01-01', '2021-10-01']) >>> df.mutate(date_x = tp.as_date(col('x'))) """ x = _col_expr(x) return x.str.strptime(pl.Date, format = fmt)
[docs] def as_datetime(x, fmt = None): """ Convert a string to a Datetime Parameters ---------- x : Expr, Series Column to operate on fmt: str "yyyy-mm-dd" Examples -------- >>> df = tp.tibble(x = ['2021-01-01', '2021-10-01']) >>> df.mutate(date_x = tp.as_datetime(col('x'))) """ x = _col_expr(x) return x.str.strptime(pl.Datetime, format = fmt)
[docs] def hour(x): """ Extract the hour from a datetime Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(hour = tp.hour(col('x'))) """ x = _col_expr(x) return x.dt.hour()
[docs] def mday(x): """ Extract the month day from a date from 1 to 31. Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(monthday = tp.mday(col('x'))) """ x = _col_expr(x) return x.dt.day()
[docs] def make_date(year = 1970, month = 1, day = 1): """ Create a date object Parameters ---------- year : Expr, str, int Column or literal month : Expr, str, int Column or literal day : Expr, str, int Column or literal Examples -------- >>> df.mutate(date = tp.make_date(2000, 1, 1)) """ return pl.date(year, month, day)
[docs] def make_datetime(year = 1970, month = 1, day = 1, hour = 0, minute = 0, second = 0): """ Create a datetime object Parameters ---------- year : Expr, str, int Column or literal month : Expr, str, int Column or literal day : Expr, str, int Column or literal hour : Expr, str, int Column or literal minute : Expr, str, int Column or literal second : Expr, str, int Column or literal Examples -------- >>> df.mutate(date = tp.make_datetime(2000, 1, 1)) """ return pl.datetime(year, month, day, hour, minute, second)
[docs] def minute(x): """ Extract the minute from a datetime Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(hour = tp.minute(col('x'))) """ x = _col_expr(x) return x.dt.minute()
[docs] def month(x): """ Extract the month from a date Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(year = tp.month(col('x'))) """ x = _col_expr(x) return x.dt.month()
[docs] def quarter(x): """ Extract the quarter from a date Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(quarter = tp.quarter(col('x'))) """ x = _col_expr(x) return ((x.dt.month() - 1) // 3) + 1
[docs] def dt_round(x, rule, n): """ Round the datetime Parameters ---------- x : Expr, Series Column to operate on rule : str Units of the downscaling operation. Any of: ``"month"``, ``"week"``, ``"day"``, ``"hour"``, ``"minute"``, ``"second"``. n : int Number of units (e.g. 5 "day", 15 "minute". Examples -------- >>> df.mutate(monthday = tp.mday(col('x'))) """ x = _col_expr(x) return x.dt.round(f"{n}{rule}")
[docs] def second(x): """ Extract the second from a datetime Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(hour = tp.minute(col('x'))) """ x = _col_expr(x) return x.dt.second()
[docs] def wday(x): """ Extract the weekday from a date from sunday = 1 to saturday = 7. Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(weekday = tp.wday(col('x'))) """ x = _col_expr(x) return x.dt.weekday() + 1
[docs] def week(x): """ Extract the week from a date Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(week = tp.week(col('x'))) """ x = _col_expr(x) return x.dt.week()
[docs] def yday(x): """ Extract the year day from a date from 1 to 366. Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(yearday = tp.yday(col('x'))) """ x = _col_expr(x) return x.dt.ordinal_day()
[docs] def year(x): """ Extract the year from a date Parameters ---------- x : Expr, Series Column to operate on Examples -------- >>> df.mutate(year = tp.year(col('x'))) """ x = _col_expr(x) return x.dt.year()
[docs] def today(): """ Return the current date as a polars literal Returns ------- Expr A literal expression with today's date. Examples -------- >>> df.mutate(today = tp.today()) """ from datetime import date return pl.lit(date.today())
[docs] def now(): """ Return the current datetime as a polars literal Returns ------- Expr A literal expression with the current datetime. Examples -------- >>> df.mutate(now = tp.now()) """ from datetime import datetime return pl.lit(datetime.now())
[docs] def difftime(x, y, units = 'days'): """ Compute time differences in specified units Parameters ---------- x : Expr, str Start date/datetime column y : Expr, str End date/datetime column units : str Units for the result: 'days', 'hours', 'minutes', 'seconds', 'weeks' Returns ------- Expr Numeric expression with the time difference. Examples -------- >>> df.mutate(diff = tp.difftime('date1', 'date2', units='days')) """ x = _col_expr(x) y = _col_expr(y) diff = (x - y).dt.total_microseconds() divisors = { 'seconds': 1_000_000, 'minutes': 60_000_000, 'hours': 3_600_000_000, 'days': 86_400_000_000, 'weeks': 604_800_000_000, } if units not in divisors: raise ValueError(f"`units` must be one of {list(divisors.keys())}") return diff / divisors[units]
def _unit_to_polars(unit): """Convert unit name to polars duration string""" mapping = { 'year': '1y', 'month': '1mo', 'week': '1w', 'day': '1d', 'hour': '1h', 'minute': '1m', 'second': '1s', } if unit not in mapping: raise ValueError(f"`unit` must be one of {list(mapping.keys())}") return mapping[unit]
[docs] def floor_date(x, unit = 'month'): """ Round date down to the nearest unit Parameters ---------- x : Expr, str Date/datetime column unit : str Unit to round to: 'year', 'month', 'week', 'day', 'hour', 'minute', 'second' Returns ------- Expr Date/datetime rounded down. Examples -------- >>> df.mutate(month_start = tp.floor_date('date', 'month')) """ x = _col_expr(x) return x.dt.truncate(_unit_to_polars(unit))
[docs] def ceiling_date(x, unit = 'month', change_on_boundary = False): """ Round date up to the nearest unit Parameters ---------- x : Expr, str Date/datetime column unit : str Unit to round to: 'year', 'month', 'week', 'day', 'hour', 'minute', 'second' change_on_boundary : bool If False (default), dates already at a boundary are unchanged. If True, boundary dates are bumped to the next unit. Returns ------- Expr Date/datetime rounded up. Examples -------- >>> df.mutate(month_end = tp.ceiling_date('date', 'month')) """ x = _col_expr(x) pl_unit = _unit_to_polars(unit) floored = x.dt.truncate(pl_unit) ceiled = x.dt.offset_by(pl_unit).dt.truncate(pl_unit) if change_on_boundary: return ceiled # If already at boundary, keep as-is return pl.when(x == floored).then(x).otherwise(ceiled)
[docs] def days(n = 1): """ Create a duration of n days Parameters ---------- n : int Number of days Returns ------- Expr A duration literal. Examples -------- >>> df.mutate(tomorrow = col('date') + tp.days(1)) """ return pl.duration(days=n)
[docs] def weeks(n = 1): """ Create a duration of n weeks Parameters ---------- n : int Number of weeks Returns ------- Expr A duration literal. Examples -------- >>> df.mutate(next_week = col('date') + tp.weeks(1)) """ return pl.duration(weeks=n)
[docs] def hours(n = 1): """ Create a duration of n hours Parameters ---------- n : int Number of hours Returns ------- Expr A duration literal. Examples -------- >>> df.mutate(later = col('datetime') + tp.hours(2)) """ return pl.duration(hours=n)
[docs] def minutes(n = 1): """ Create a duration of n minutes Parameters ---------- n : int Number of minutes Returns ------- Expr A duration literal. Examples -------- >>> df.mutate(later = col('datetime') + tp.minutes(30)) """ return pl.duration(minutes=n)
[docs] def seconds(n = 1): """ Create a duration of n seconds Parameters ---------- n : int Number of seconds Returns ------- Expr A duration literal. Examples -------- >>> df.mutate(later = col('datetime') + tp.seconds(10)) """ return pl.duration(seconds=n)
[docs] def milliseconds(n = 1): """ Create a duration of n milliseconds Parameters ---------- n : int Number of milliseconds Returns ------- Expr A duration literal. """ return pl.duration(milliseconds=n)
[docs] def microseconds(n = 1): """ Create a duration of n microseconds Parameters ---------- n : int Number of microseconds Returns ------- Expr A duration literal. """ return pl.duration(microseconds=n)