From eadb41fd8bc5b2d7b06b2721131002a435b2b14c Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 15 Nov 2024 13:54:45 -0500 Subject: [PATCH] [ENG-6435] Fix: duplicate reports when run for past years (#10800) --- osf/metrics/reports.py | 27 +++++++++++++++++------- osf/metrics/utils.py | 10 +++++++++ osf_tests/metrics/test_daily_report.py | 21 +++++++++++++++--- osf_tests/metrics/test_monthly_report.py | 5 +++++ osf_tests/metrics/test_yearmonth.txt | 18 ++++++++++++++++ 5 files changed, 70 insertions(+), 11 deletions(-) diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index d1e21db9c45..28ca6cdb964 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -30,6 +30,16 @@ def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) assert 'report_date' in cls.UNIQUE_TOGETHER_FIELDS, f'DailyReport subclasses must have "report_date" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})' + def save(self, *args, **kwargs): + if self.timestamp is None: + self.timestamp = datetime.datetime( + self.report_date.year, + self.report_date.month, + self.report_date.day, + tzinfo=datetime.UTC, + ) + super().save(*args, **kwargs) + class Meta: abstract = True dynamic = metrics.MetaField('strict') @@ -41,19 +51,15 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs, format='strict_year_month') def deserialize(self, data): - if isinstance(data, YearMonth): - return data - elif isinstance(data, str): - return YearMonth.from_str(data) - elif isinstance(data, (datetime.datetime, datetime.date)): - return YearMonth.from_date(data) - elif isinstance(data, int): + if isinstance(data, int): # elasticsearch stores dates in milliseconds since the unix epoch _as_datetime = datetime.datetime.fromtimestamp(data // 1000) return YearMonth.from_date(_as_datetime) elif data is None: return None - else: + try: + return YearMonth.from_any(data) + except ValueError: raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth') def serialize(self, data): @@ -102,6 +108,11 @@ def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) assert 'report_yearmonth' in cls.UNIQUE_TOGETHER_FIELDS, f'MonthlyReport subclasses must have "report_yearmonth" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})' + def save(self, *args, **kwargs): + if self.timestamp is None: + self.timestamp = YearMonth.from_any(self.report_yearmonth).month_start() + super().save(*args, **kwargs) + @receiver(metrics_pre_save) def set_report_id(sender, instance, **kwargs): diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py index 910b1f3104c..7c9fed2c6fb 100644 --- a/osf/metrics/utils.py +++ b/osf/metrics/utils.py @@ -46,6 +46,16 @@ def from_str(cls, input_str: str) -> YearMonth: else: raise ValueError(f'expected YYYY-MM format, got "{input_str}"') + @classmethod + def from_any(cls, data) -> YearMonth: + if isinstance(data, YearMonth): + return data + elif isinstance(data, str): + return YearMonth.from_str(data) + elif isinstance(data, (datetime.datetime, datetime.date)): + return YearMonth.from_date(data) + raise ValueError(f'cannot coerce {data} into YearMonth') + def __str__(self): """convert to string of "YYYY-MM" format""" return f'{self.year}-{self.month:0>2}' diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py index 3840f5dba21..46375184f95 100644 --- a/osf_tests/metrics/test_daily_report.py +++ b/osf_tests/metrics/test_daily_report.py @@ -1,4 +1,4 @@ -from datetime import date +import datetime from unittest import mock import pytest @@ -21,7 +21,13 @@ class UniqueByDate(DailyReport): class Meta: app_label = 'osf' - today = date(2022, 5, 18) + today = datetime.date(2022, 5, 18) + expected_timestamp = datetime.datetime( + today.year, + today.month, + today.day, + tzinfo=datetime.UTC, + ) reports = [ UniqueByDate(report_date=today), @@ -35,6 +41,7 @@ class Meta: assert mock_save.call_count == 1 assert mock_save.call_args[0][0] is report assert report.meta.id == expected_key + assert report.timestamp == expected_timestamp mock_save.reset_mock() def test_with_unique_together(self, mock_save): @@ -46,7 +53,13 @@ class UniqueByDateAndField(DailyReport): class Meta: app_label = 'osf' - today = date(2022, 5, 18) + today = datetime.date(2022, 5, 18) + expected_timestamp = datetime.datetime( + today.year, + today.month, + today.day, + tzinfo=datetime.UTC, + ) expected_blah = 'dca57e6cde89b19274ea24bc713971dab137a896b8e06d43a11a3f437cd1d151' blah_report = UniqueByDateAndField(report_date=today, uniquefield='blah') @@ -54,6 +67,7 @@ class Meta: assert mock_save.call_count == 1 assert mock_save.call_args[0][0] is blah_report assert blah_report.meta.id == expected_blah + assert blah_report.timestamp == expected_timestamp mock_save.reset_mock() expected_fleh = 'e7dd5ff6b087807efcfa958077dc713878f21c65af79b3ccdb5dc2409bf5ad99' @@ -62,6 +76,7 @@ class Meta: assert mock_save.call_count == 1 assert mock_save.call_args[0][0] is fleh_report assert fleh_report.meta.id == expected_fleh + assert fleh_report.timestamp == expected_timestamp mock_save.reset_mock() for _bad_report in ( diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py index 23546eb1fb3..0c0302a7f08 100644 --- a/osf_tests/metrics/test_monthly_report.py +++ b/osf_tests/metrics/test_monthly_report.py @@ -23,6 +23,7 @@ class Meta: app_label = 'osf' yearmonth = YearMonth(2022, 5) + expected_timestamp = datetime.datetime(yearmonth.year, yearmonth.month, 1, tzinfo=datetime.UTC) reports = [ UniqueByMonth(report_yearmonth=yearmonth), @@ -36,6 +37,7 @@ class Meta: assert mock_save.call_count == 1 assert mock_save.call_args[0][0] is report assert report.meta.id == expected_key + assert report.timestamp == expected_timestamp mock_save.reset_mock() def test_with_unique_together(self, mock_save): @@ -48,6 +50,7 @@ class Meta: app_label = 'osf' yearmonth = YearMonth(2022, 5) + expected_timestamp = datetime.datetime(yearmonth.year, yearmonth.month, 1, tzinfo=datetime.UTC) expected_blah = '62ebf38317cd8402e27a50ce99f836d1734b3f545adf7d144d0e1cf37a0d9d08' blah_report = UniqueByMonthAndField(report_yearmonth=yearmonth, uniquefield='blah') @@ -55,6 +58,7 @@ class Meta: assert mock_save.call_count == 1 assert mock_save.call_args[0][0] is blah_report assert blah_report.meta.id == expected_blah + assert blah_report.timestamp == expected_timestamp mock_save.reset_mock() expected_fleh = '385700db282f6d6089a0d21836db5ee8423f548615e515b6e034bcc90a14500f' @@ -63,6 +67,7 @@ class Meta: assert mock_save.call_count == 1 assert mock_save.call_args[0][0] is fleh_report assert fleh_report.meta.id == expected_fleh + assert fleh_report.timestamp == expected_timestamp mock_save.reset_mock() for _bad_report in ( diff --git a/osf_tests/metrics/test_yearmonth.txt b/osf_tests/metrics/test_yearmonth.txt index 646c73c42f9..17d847f689b 100644 --- a/osf_tests/metrics/test_yearmonth.txt +++ b/osf_tests/metrics/test_yearmonth.txt @@ -24,6 +24,24 @@ YearMonth(year=1974, month=3) >>> YearMonth.from_str('2000-12') YearMonth(year=2000, month=12) +`from_any` constructor, accepts YearMonth, "YYYY-MM", or date/datetime +>>> YearMonth.from_any('2000-12') +YearMonth(year=2000, month=12) +>>> YearMonth.from_any(_) is _ +True +>>> YearMonth.from_any(datetime.date(1973, 1, 1)) +YearMonth(year=1973, month=1) +>>> YearMonth.from_any(datetime.datetime(1974, 3, 2)) +YearMonth(year=1974, month=3) +>>> YearMonth.from_any(None) +Traceback (most recent call last): + ... +ValueError: cannot coerce None into YearMonth +>>> YearMonth.from_any(7) +Traceback (most recent call last): + ... +ValueError: cannot coerce 7 into YearMonth + `__str__` method gives "YYYY-MM" format: >>> str(YearMonth(1491, 7)) '1491-07'