From a3d6db23b5dfc0d8e238e55a6ffadb4b5305e72c Mon Sep 17 00:00:00 2001 From: Christine Draper Date: Thu, 23 May 2024 09:41:41 -0400 Subject: [PATCH 1/2] Fix calculate_num_batches --- .../diagnoser/converters/test_granularity.py | 7 +++++ .../diagnoser/converters/granularity.py | 29 +++++-------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/tests/monitor/diagnoser/converters/test_granularity.py b/tests/monitor/diagnoser/converters/test_granularity.py index 5e18a5c..deec22c 100644 --- a/tests/monitor/diagnoser/converters/test_granularity.py +++ b/tests/monitor/diagnoser/converters/test_granularity.py @@ -5,20 +5,27 @@ def test_calculate_num_batches_hourly(): assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-01T03:30:00Z', 'hourly') == 3 assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-03T01:00:00Z', 'hourly') == 49 assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-02T00:00:00Z', 'hourly') == 24 + assert calculate_num_batches('2022-11-19T00:00:00.000Z/2022-12-19T00:00:00.000Z', 'hourly') == 720 + assert calculate_num_batches('2022-11-19T00:00:00.000Z/2023-12-19T00:00:00.000Z', 'hourly') == 9480 def test_calculate_num_batches_daily(): assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-02T00:00:00Z', 'daily') == 1 assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-09T00:00:00Z', 'daily') == 8 + assert calculate_num_batches('2022-11-19T00:00:00.000Z/2022-12-19T00:00:00.000Z', 'daily') == 30 + assert calculate_num_batches('2022-11-19T00:00:00.000Z/2023-12-19T00:00:00.000Z', 'daily') == 395 def test_calculate_num_batches_weekly(): assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-02T00:00:00Z', 'weekly') == 0 assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-09T00:00:00Z', 'weekly') == 1 + assert calculate_num_batches('2022-11-19T00:00:00.000Z/2022-12-19T00:00:00.000Z', 'weekly') == 4 + assert calculate_num_batches('2022-11-19T00:00:00.000Z/2023-12-19T00:00:00.000Z', 'weekly') == 56 def test_calculate_num_batches_monthly(): assert calculate_num_batches('2022-01-01T00:00:00Z/2022-02-02T00:00:00Z', 'monthly') == 1 + assert calculate_num_batches('2022-01-01T00:00:00Z/2023-02-02T00:00:00Z', 'monthly') == 13 def test_calculate_num_batches_duration(): diff --git a/whylabs_toolkit/monitor/diagnoser/converters/granularity.py b/whylabs_toolkit/monitor/diagnoser/converters/granularity.py index 5be8d25..27bdd7c 100644 --- a/whylabs_toolkit/monitor/diagnoser/converters/granularity.py +++ b/whylabs_toolkit/monitor/diagnoser/converters/granularity.py @@ -1,21 +1,7 @@ -from dateutil.relativedelta import relativedelta -from whylabs_toolkit.utils.granularity import Granularity +from math import floor from isodate import parse_datetime, parse_duration, parse_date -def batches_to_timedelta(time_period: str, batches: int) -> relativedelta: - if time_period == "PT1H": - return relativedelta(hours=batches) - - if time_period == "P1W": - return relativedelta(weeks=batches) - - if time_period == "P1M": - return relativedelta(months=batches) - - return relativedelta(days=batches) - - def calculate_num_batches(interval: str, granularity: str) -> int: # Parse the ISO8601 interval string into a start and end datetime start, end = interval.split("/") @@ -25,16 +11,17 @@ def calculate_num_batches(interval: str, granularity: str) -> int: except ValueError: end_date = start_date + parse_duration(end) - # Calculate the difference based on the granularity + # Calculate the (somewhat approximate) difference based on the granularity + # Truncates to whole batches, ignores leap seconds if granularity == "hourly": - difference = relativedelta(end_date, start_date).days * 24 + relativedelta(end_date, start_date).hours + difference = (end_date - start_date).total_seconds() / 3600 elif granularity == "daily": - difference = relativedelta(end_date, start_date).days + difference = (end_date - start_date).total_seconds() / (3600 * 24) elif granularity == "weekly": - difference = relativedelta(end_date, start_date).weeks + difference = (end_date - start_date).total_seconds() / (3600 * 24 * 7) elif granularity == "monthly": - difference = relativedelta(end_date, start_date).months + difference = (end_date.year - start_date.year) * 12 + end_date.month - start_date.month else: raise ValueError(f"Unsupported granularity: {granularity}") - return difference + return floor(difference) From cf430ad9fbfc9e0a6a97af46c88f24efbc261d1f Mon Sep 17 00:00:00 2001 From: Christine Draper Date: Thu, 23 May 2024 12:40:02 -0400 Subject: [PATCH 2/2] Lint fix --- whylabs_toolkit/monitor/diagnoser/converters/granularity.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/whylabs_toolkit/monitor/diagnoser/converters/granularity.py b/whylabs_toolkit/monitor/diagnoser/converters/granularity.py index 27bdd7c..a501d05 100644 --- a/whylabs_toolkit/monitor/diagnoser/converters/granularity.py +++ b/whylabs_toolkit/monitor/diagnoser/converters/granularity.py @@ -24,4 +24,5 @@ def calculate_num_batches(interval: str, granularity: str) -> int: else: raise ValueError(f"Unsupported granularity: {granularity}") - return floor(difference) + diff_as_int: int = floor(difference) + return diff_as_int