### Datetime classes

In [1]:
import datetime as dt

dt.date(2025, 11, 3)

datetime.date(2025, 11, 3)

In [3]:
nov3class = dt.datetime(2025, 11, 3, 12, 30, 2)
str(nov3class)

'2025-11-03 12:30:02'

In [None]:
# requires a format
dt.datetime.strptime("2025-11-03 12:30:02", "%Y-%m-%d %H:%M:%S")

datetime.datetime(2025, 11, 3, 12, 30, 2)

In [9]:
import dateutil

# infers the format
dateutil.parser.parse("2025-11-03 12:30:02")

datetime.datetime(2025, 11, 3, 12, 30, 2)

In [15]:
# assumes MM/DD/YYYY but could be DD/MM/YYYY...
dateutil.parser.parse("03/11/2025 12:30:02")

datetime.datetime(2025, 3, 11, 12, 30, 2)

In [16]:
dateutil.parser.parse("03/11/2025 12:30:02", dayfirst=True)

datetime.datetime(2025, 11, 3, 12, 30, 2)

### Polars

In [18]:
import polars as pl

pl.select(pl.date(2025, 11, 3))

date
date
2025-11-03


In [19]:
pl.select(dt.date(2025, 11, 3))

literal
date
2025-11-03


In [21]:
# can infer from string
pl.select(pl.lit("2025-11-03").str.to_date())

literal
date
2025-11-03


In [None]:
# note that it defaults to DD/MM/YYYY
pl.select(pl.lit("11/03/2025").str.to_date())

literal
date
2025-03-11


In [24]:
pl.select(pl.lit("11/03/2025").str.to_date("%m/%d/%Y"))

literal
date
2025-11-03


In [26]:
pl.select(pl.lit("11/03/2025").str.to_date("%m/%d/%Y").cast(pl.Int64))

literal
i64
20395


In [28]:
# integer is days since the unix epoch
pl.select(pl.date(2025, 11, 3) - pl.duration(days=20395))

date
date
1970-01-01


In [29]:
df = pl.DataFrame(
    {
        "time": pl.datetime_range(
            start=pl.datetime(2021, 12, 16),
            end=pl.datetime(2021, 12, 16, 3),
            interval="30m",
            eager=True,
        ),
        "groups": ["a", "a", "a", "b", "b", "a", "a"],
        "values": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
    }
)


time,groups,values
datetime[μs],str,f64
2021-12-16 00:00:00,"""a""",1.0
2021-12-16 00:30:00,"""a""",2.0
2021-12-16 01:00:00,"""a""",3.0
2021-12-16 01:30:00,"""b""",4.0
2021-12-16 02:00:00,"""b""",5.0
2021-12-16 02:30:00,"""a""",6.0
2021-12-16 03:00:00,"""a""",7.0


In [32]:
pl.select(df["time"] + pl.duration(hours=1))

time
datetime[μs]
2021-12-16 01:00:00
2021-12-16 01:30:00
2021-12-16 02:00:00
2021-12-16 02:30:00
2021-12-16 03:00:00
2021-12-16 03:30:00
2021-12-16 04:00:00


In [36]:
df2 = df.with_columns(
    (pl.col("time") + pl.duration(minutes=pl.col("values") ** 2)).alias("time2")
)

time,groups,values,time2
datetime[μs],str,f64,datetime[μs]
2021-12-16 00:00:00,"""a""",1.0,2021-12-16 00:01:00
2021-12-16 00:30:00,"""a""",2.0,2021-12-16 00:34:00
2021-12-16 01:00:00,"""a""",3.0,2021-12-16 01:09:00
2021-12-16 01:30:00,"""b""",4.0,2021-12-16 01:46:00
2021-12-16 02:00:00,"""b""",5.0,2021-12-16 02:25:00
2021-12-16 02:30:00,"""a""",6.0,2021-12-16 03:06:00
2021-12-16 03:00:00,"""a""",7.0,2021-12-16 03:49:00


In [39]:
df2.select(pl.col("time2") - pl.col("time"))

time2
duration[μs]
1m
4m
9m
16m
25m
36m
49m


In [40]:
df.with_columns(
    pl.col("time").dt.year().alias("year"), pl.col("time").dt.hour().alias("hour")
)

time,groups,values,year,hour
datetime[μs],str,f64,i32,i8
2021-12-16 00:00:00,"""a""",1.0,2021,0
2021-12-16 00:30:00,"""a""",2.0,2021,0
2021-12-16 01:00:00,"""a""",3.0,2021,1
2021-12-16 01:30:00,"""b""",4.0,2021,1
2021-12-16 02:00:00,"""b""",5.0,2021,2
2021-12-16 02:30:00,"""a""",6.0,2021,2
2021-12-16 03:00:00,"""a""",7.0,2021,3


In [41]:
df.select(pl.col("time").dt.to_string())

time
str
"""2021-12-16 00:00:00.000000"""
"""2021-12-16 00:30:00.000000"""
"""2021-12-16 01:00:00.000000"""
"""2021-12-16 01:30:00.000000"""
"""2021-12-16 02:00:00.000000"""
"""2021-12-16 02:30:00.000000"""
"""2021-12-16 03:00:00.000000"""


### Ranges

In [45]:
pl.select(pl.date_range(pl.date(2022, 1, 1), pl.date(2022, 3, 1), "1mo"))

date
date
2022-01-01
2022-02-01
2022-03-01


In [52]:
df2.select(
    pl.datetime_range(
        pl.datetime(2025, 11, 3, 16, 00),
        pl.datetime(2025, 11, 3, 18, 5),
        dt.timedelta(minutes=15),
    )
)

datetime
datetime[μs]
2025-11-03 16:00:00
2025-11-03 16:15:00
2025-11-03 16:30:00
2025-11-03 16:45:00
2025-11-03 17:00:00
2025-11-03 17:15:00
2025-11-03 17:30:00
2025-11-03 17:45:00
2025-11-03 18:00:00


In [50]:
df2.with_columns(
    pl.datetime_ranges(pl.col("time"), pl.col("time2"), interval="3m").alias("ranges")
)

time,groups,values,time2,ranges
datetime[μs],str,f64,datetime[μs],list[datetime[μs]]
2021-12-16 00:00:00,"""a""",1.0,2021-12-16 00:01:00,[2021-12-16 00:00:00]
2021-12-16 00:30:00,"""a""",2.0,2021-12-16 00:34:00,"[2021-12-16 00:30:00, 2021-12-16 00:33:00]"
2021-12-16 01:00:00,"""a""",3.0,2021-12-16 01:09:00,"[2021-12-16 01:00:00, 2021-12-16 01:03:00, … 2021-12-16 01:09:00]"
2021-12-16 01:30:00,"""b""",4.0,2021-12-16 01:46:00,"[2021-12-16 01:30:00, 2021-12-16 01:33:00, … 2021-12-16 01:45:00]"
2021-12-16 02:00:00,"""b""",5.0,2021-12-16 02:25:00,"[2021-12-16 02:00:00, 2021-12-16 02:03:00, … 2021-12-16 02:24:00]"
2021-12-16 02:30:00,"""a""",6.0,2021-12-16 03:06:00,"[2021-12-16 02:30:00, 2021-12-16 02:33:00, … 2021-12-16 03:06:00]"
2021-12-16 03:00:00,"""a""",7.0,2021-12-16 03:49:00,"[2021-12-16 03:00:00, 2021-12-16 03:03:00, … 2021-12-16 03:48:00]"


In [53]:
df2.with_columns(pl.col("time2").shift(2))

time,groups,values,time2
datetime[μs],str,f64,datetime[μs]
2021-12-16 00:00:00,"""a""",1.0,
2021-12-16 00:30:00,"""a""",2.0,
2021-12-16 01:00:00,"""a""",3.0,2021-12-16 00:01:00
2021-12-16 01:30:00,"""b""",4.0,2021-12-16 00:34:00
2021-12-16 02:00:00,"""b""",5.0,2021-12-16 01:09:00
2021-12-16 02:30:00,"""a""",6.0,2021-12-16 01:46:00
2021-12-16 03:00:00,"""a""",7.0,2021-12-16 02:25:00


In [54]:
df2.with_columns(pl.col("time2").shift(-2))

time,groups,values,time2
datetime[μs],str,f64,datetime[μs]
2021-12-16 00:00:00,"""a""",1.0,2021-12-16 01:09:00
2021-12-16 00:30:00,"""a""",2.0,2021-12-16 01:46:00
2021-12-16 01:00:00,"""a""",3.0,2021-12-16 02:25:00
2021-12-16 01:30:00,"""b""",4.0,2021-12-16 03:06:00
2021-12-16 02:00:00,"""b""",5.0,2021-12-16 03:49:00
2021-12-16 02:30:00,"""a""",6.0,
2021-12-16 03:00:00,"""a""",7.0,


In [58]:
df2.select(pl.col("time") + pl.duration(days=93))

time
datetime[μs]
2022-03-19 00:00:00
2022-03-19 00:30:00
2022-03-19 01:00:00
2022-03-19 01:30:00
2022-03-19 02:00:00
2022-03-19 02:30:00
2022-03-19 03:00:00


In [66]:
df.select(pl.col("time") + pl.duration(months=3))

TypeError: duration() got an unexpected keyword argument 'months'

In [65]:
# calendar-aware
df.select(pl.col("time").dt.offset_by("3mo"))

time
datetime[μs]
2022-03-16 00:00:00
2022-03-16 00:30:00
2022-03-16 01:00:00
2022-03-16 01:30:00
2022-03-16 02:00:00
2022-03-16 02:30:00
2022-03-16 03:00:00


In [64]:
df.select(pl.col("time").dt.month_end())

time
datetime[μs]
2021-12-31 00:00:00
2021-12-31 00:30:00
2021-12-31 01:00:00
2021-12-31 01:30:00
2021-12-31 02:00:00
2021-12-31 02:30:00
2021-12-31 03:00:00


In [88]:
df.select(pl.datetime_ranges(pl.col("time"), pl.col("time").dt.month_end()))

time
list[datetime[μs]]
"[2021-12-16 00:00:00, 2021-12-17 00:00:00, … 2021-12-31 00:00:00]"
"[2021-12-16 00:30:00, 2021-12-17 00:30:00, … 2021-12-31 00:30:00]"
"[2021-12-16 01:00:00, 2021-12-17 01:00:00, … 2021-12-31 01:00:00]"
"[2021-12-16 01:30:00, 2021-12-17 01:30:00, … 2021-12-31 01:30:00]"
"[2021-12-16 02:00:00, 2021-12-17 02:00:00, … 2021-12-31 02:00:00]"
"[2021-12-16 02:30:00, 2021-12-17 02:30:00, … 2021-12-31 02:30:00]"
"[2021-12-16 03:00:00, 2021-12-17 03:00:00, … 2021-12-31 03:00:00]"


### Time Zones

In [67]:
df

time,groups,values
datetime[μs],str,f64
2021-12-16 00:00:00,"""a""",1.0
2021-12-16 00:30:00,"""a""",2.0
2021-12-16 01:00:00,"""a""",3.0
2021-12-16 01:30:00,"""b""",4.0
2021-12-16 02:00:00,"""b""",5.0
2021-12-16 02:30:00,"""a""",6.0
2021-12-16 03:00:00,"""a""",7.0


In [69]:
df.with_columns(pl.col("time").dt.replace_time_zone("America/Chicago"))

time,groups,values
"datetime[μs, America/Chicago]",str,f64
2021-12-16 00:00:00 CST,"""a""",1.0
2021-12-16 00:30:00 CST,"""a""",2.0
2021-12-16 01:00:00 CST,"""a""",3.0
2021-12-16 01:30:00 CST,"""b""",4.0
2021-12-16 02:00:00 CST,"""b""",5.0
2021-12-16 02:30:00 CST,"""a""",6.0
2021-12-16 03:00:00 CST,"""a""",7.0


In [70]:
df.with_columns(pl.col("time").dt.replace_time_zone("UTC"))

time,groups,values
"datetime[μs, UTC]",str,f64
2021-12-16 00:00:00 UTC,"""a""",1.0
2021-12-16 00:30:00 UTC,"""a""",2.0
2021-12-16 01:00:00 UTC,"""a""",3.0
2021-12-16 01:30:00 UTC,"""b""",4.0
2021-12-16 02:00:00 UTC,"""b""",5.0
2021-12-16 02:30:00 UTC,"""a""",6.0
2021-12-16 03:00:00 UTC,"""a""",7.0


In [None]:
# assumes starting with UTC
df.with_columns(pl.col("time").dt.convert_time_zone("America/Chicago"))

time,groups,values
"datetime[μs, America/Chicago]",str,f64
2021-12-15 18:00:00 CST,"""a""",1.0
2021-12-15 18:30:00 CST,"""a""",2.0
2021-12-15 19:00:00 CST,"""a""",3.0
2021-12-15 19:30:00 CST,"""b""",4.0
2021-12-15 20:00:00 CST,"""b""",5.0
2021-12-15 20:30:00 CST,"""a""",6.0
2021-12-15 21:00:00 CST,"""a""",7.0


In [81]:
df.with_columns(
    pl.col("time")
    .dt.replace_time_zone("America/New_York")
    .dt.convert_time_zone("America/Chicago")
)

time,groups,values
"datetime[μs, America/Chicago]",str,f64
2021-12-15 23:00:00 CST,"""a""",1.0
2021-12-15 23:30:00 CST,"""a""",2.0
2021-12-16 00:00:00 CST,"""a""",3.0
2021-12-16 00:30:00 CST,"""b""",4.0
2021-12-16 01:00:00 CST,"""b""",5.0
2021-12-16 01:30:00 CST,"""a""",6.0
2021-12-16 02:00:00 CST,"""a""",7.0


In [85]:
df.with_columns(
    pl.col("time").dt.replace_time_zone("America/Chicago").dt.convert_time_zone("UTC")
    - pl.col("time")
    .dt.replace_time_zone("America/New_York")
    .dt.convert_time_zone("UTC")
)

time,groups,values
duration[μs],str,f64
1h,"""a""",1.0
1h,"""a""",2.0
1h,"""a""",3.0
1h,"""b""",4.0
1h,"""b""",5.0
1h,"""a""",6.0
1h,"""a""",7.0


### Resampling

In [101]:
df.group_by_dynamic("time", every="1h").agg(pl.col("values").mean())


time,values
datetime[μs],f64
2021-12-16 00:00:00,1.5
2021-12-16 01:00:00,3.5
2021-12-16 02:00:00,5.5
2021-12-16 03:00:00,7.0


In [104]:
df.upsample("time", every="15m")

time,groups,values
datetime[μs],str,f64
2021-12-16 00:00:00,"""a""",1.0
2021-12-16 00:15:00,,
2021-12-16 00:30:00,"""a""",2.0
2021-12-16 00:45:00,,
2021-12-16 01:00:00,"""a""",3.0
…,…,…
2021-12-16 02:00:00,"""b""",5.0
2021-12-16 02:15:00,,
2021-12-16 02:30:00,"""a""",6.0
2021-12-16 02:45:00,,


In [105]:
df.upsample("time", every="15m").fill_null(strategy="forward")

time,groups,values
datetime[μs],str,f64
2021-12-16 00:00:00,"""a""",1.0
2021-12-16 00:15:00,"""a""",1.0
2021-12-16 00:30:00,"""a""",2.0
2021-12-16 00:45:00,"""a""",2.0
2021-12-16 01:00:00,"""a""",3.0
…,…,…
2021-12-16 02:00:00,"""b""",5.0
2021-12-16 02:15:00,"""b""",5.0
2021-12-16 02:30:00,"""a""",6.0
2021-12-16 02:45:00,"""a""",6.0


In [107]:
df.upsample("time", every="15m").with_columns(
    pl.col("groups").fill_null(strategy="forward"), pl.col("values").interpolate()
)

time,groups,values
datetime[μs],str,f64
2021-12-16 00:00:00,"""a""",1.0
2021-12-16 00:15:00,"""a""",1.5
2021-12-16 00:30:00,"""a""",2.0
2021-12-16 00:45:00,"""a""",2.5
2021-12-16 01:00:00,"""a""",3.0
…,…,…
2021-12-16 02:00:00,"""b""",5.0
2021-12-16 02:15:00,"""b""",5.5
2021-12-16 02:30:00,"""a""",6.0
2021-12-16 02:45:00,"""a""",6.5


### Windows

In [89]:
df

time,groups,values
datetime[μs],str,f64
2021-12-16 00:00:00,"""a""",1.0
2021-12-16 00:30:00,"""a""",2.0
2021-12-16 01:00:00,"""a""",3.0
2021-12-16 01:30:00,"""b""",4.0
2021-12-16 02:00:00,"""b""",5.0
2021-12-16 02:30:00,"""a""",6.0
2021-12-16 03:00:00,"""a""",7.0


In [92]:
df.rolling("time", period="1h30m").agg(pl.col("values").min())

time,values
datetime[μs],f64
2021-12-16 00:00:00,1.0
2021-12-16 00:30:00,1.0
2021-12-16 01:00:00,1.0
2021-12-16 01:30:00,2.0
2021-12-16 02:00:00,3.0
2021-12-16 02:30:00,4.0
2021-12-16 03:00:00,5.0


In [93]:
df.rolling("time", period="1h30m").agg(pl.col("values").sum())

time,values
datetime[μs],f64
2021-12-16 00:00:00,1.0
2021-12-16 00:30:00,3.0
2021-12-16 01:00:00,6.0
2021-12-16 01:30:00,9.0
2021-12-16 02:00:00,12.0
2021-12-16 02:30:00,15.0
2021-12-16 03:00:00,18.0


In [96]:
df.rolling("time", period="1h30m").agg(pl.col("values").min())

time,values
datetime[μs],f64
2021-12-16 00:00:00,1.0
2021-12-16 00:30:00,1.0
2021-12-16 01:00:00,1.0
2021-12-16 01:30:00,2.0
2021-12-16 02:00:00,3.0
2021-12-16 02:30:00,4.0
2021-12-16 03:00:00,5.0


In [99]:
# times are at the center of the interval
df.rolling("time", period="1h30m", offset="-45m").agg(pl.col("values").min())

time,values
datetime[μs],f64
2021-12-16 00:00:00,1.0
2021-12-16 00:30:00,1.0
2021-12-16 01:00:00,2.0
2021-12-16 01:30:00,3.0
2021-12-16 02:00:00,4.0
2021-12-16 02:30:00,5.0
2021-12-16 03:00:00,6.0
