Skip to content

Commit

Permalink
[ENG-6435] Fix: duplicate reports when run for past years (CenterForO…
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb authored Nov 15, 2024
1 parent f67b86f commit eadb41f
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 11 deletions.
27 changes: 19 additions & 8 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@ def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
assert 'report_date' in cls.UNIQUE_TOGETHER_FIELDS, f'DailyReport subclasses must have "report_date" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})'

def save(self, *args, **kwargs):
if self.timestamp is None:
self.timestamp = datetime.datetime(
self.report_date.year,
self.report_date.month,
self.report_date.day,
tzinfo=datetime.UTC,
)
super().save(*args, **kwargs)

class Meta:
abstract = True
dynamic = metrics.MetaField('strict')
Expand All @@ -41,19 +51,15 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs, format='strict_year_month')

def deserialize(self, data):
if isinstance(data, YearMonth):
return data
elif isinstance(data, str):
return YearMonth.from_str(data)
elif isinstance(data, (datetime.datetime, datetime.date)):
return YearMonth.from_date(data)
elif isinstance(data, int):
if isinstance(data, int):
# elasticsearch stores dates in milliseconds since the unix epoch
_as_datetime = datetime.datetime.fromtimestamp(data // 1000)
return YearMonth.from_date(_as_datetime)
elif data is None:
return None
else:
try:
return YearMonth.from_any(data)
except ValueError:
raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth')

def serialize(self, data):
Expand Down Expand Up @@ -102,6 +108,11 @@ def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
assert 'report_yearmonth' in cls.UNIQUE_TOGETHER_FIELDS, f'MonthlyReport subclasses must have "report_yearmonth" in UNIQUE_TOGETHER_FIELDS (on {cls.__qualname__}, got {cls.UNIQUE_TOGETHER_FIELDS})'

def save(self, *args, **kwargs):
if self.timestamp is None:
self.timestamp = YearMonth.from_any(self.report_yearmonth).month_start()
super().save(*args, **kwargs)


@receiver(metrics_pre_save)
def set_report_id(sender, instance, **kwargs):
Expand Down
10 changes: 10 additions & 0 deletions osf/metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ def from_str(cls, input_str: str) -> YearMonth:
else:
raise ValueError(f'expected YYYY-MM format, got "{input_str}"')

@classmethod
def from_any(cls, data) -> YearMonth:
if isinstance(data, YearMonth):
return data
elif isinstance(data, str):
return YearMonth.from_str(data)
elif isinstance(data, (datetime.datetime, datetime.date)):
return YearMonth.from_date(data)
raise ValueError(f'cannot coerce {data} into YearMonth')

def __str__(self):
"""convert to string of "YYYY-MM" format"""
return f'{self.year}-{self.month:0>2}'
Expand Down
21 changes: 18 additions & 3 deletions osf_tests/metrics/test_daily_report.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date
import datetime
from unittest import mock

import pytest
Expand All @@ -21,7 +21,13 @@ class UniqueByDate(DailyReport):
class Meta:
app_label = 'osf'

today = date(2022, 5, 18)
today = datetime.date(2022, 5, 18)
expected_timestamp = datetime.datetime(
today.year,
today.month,
today.day,
tzinfo=datetime.UTC,
)

reports = [
UniqueByDate(report_date=today),
Expand All @@ -35,6 +41,7 @@ class Meta:
assert mock_save.call_count == 1
assert mock_save.call_args[0][0] is report
assert report.meta.id == expected_key
assert report.timestamp == expected_timestamp
mock_save.reset_mock()

def test_with_unique_together(self, mock_save):
Expand All @@ -46,14 +53,21 @@ class UniqueByDateAndField(DailyReport):
class Meta:
app_label = 'osf'

today = date(2022, 5, 18)
today = datetime.date(2022, 5, 18)
expected_timestamp = datetime.datetime(
today.year,
today.month,
today.day,
tzinfo=datetime.UTC,
)

expected_blah = 'dca57e6cde89b19274ea24bc713971dab137a896b8e06d43a11a3f437cd1d151'
blah_report = UniqueByDateAndField(report_date=today, uniquefield='blah')
blah_report.save()
assert mock_save.call_count == 1
assert mock_save.call_args[0][0] is blah_report
assert blah_report.meta.id == expected_blah
assert blah_report.timestamp == expected_timestamp
mock_save.reset_mock()

expected_fleh = 'e7dd5ff6b087807efcfa958077dc713878f21c65af79b3ccdb5dc2409bf5ad99'
Expand All @@ -62,6 +76,7 @@ class Meta:
assert mock_save.call_count == 1
assert mock_save.call_args[0][0] is fleh_report
assert fleh_report.meta.id == expected_fleh
assert fleh_report.timestamp == expected_timestamp
mock_save.reset_mock()

for _bad_report in (
Expand Down
5 changes: 5 additions & 0 deletions osf_tests/metrics/test_monthly_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Meta:
app_label = 'osf'

yearmonth = YearMonth(2022, 5)
expected_timestamp = datetime.datetime(yearmonth.year, yearmonth.month, 1, tzinfo=datetime.UTC)

reports = [
UniqueByMonth(report_yearmonth=yearmonth),
Expand All @@ -36,6 +37,7 @@ class Meta:
assert mock_save.call_count == 1
assert mock_save.call_args[0][0] is report
assert report.meta.id == expected_key
assert report.timestamp == expected_timestamp
mock_save.reset_mock()

def test_with_unique_together(self, mock_save):
Expand All @@ -48,13 +50,15 @@ class Meta:
app_label = 'osf'

yearmonth = YearMonth(2022, 5)
expected_timestamp = datetime.datetime(yearmonth.year, yearmonth.month, 1, tzinfo=datetime.UTC)

expected_blah = '62ebf38317cd8402e27a50ce99f836d1734b3f545adf7d144d0e1cf37a0d9d08'
blah_report = UniqueByMonthAndField(report_yearmonth=yearmonth, uniquefield='blah')
blah_report.save()
assert mock_save.call_count == 1
assert mock_save.call_args[0][0] is blah_report
assert blah_report.meta.id == expected_blah
assert blah_report.timestamp == expected_timestamp
mock_save.reset_mock()

expected_fleh = '385700db282f6d6089a0d21836db5ee8423f548615e515b6e034bcc90a14500f'
Expand All @@ -63,6 +67,7 @@ class Meta:
assert mock_save.call_count == 1
assert mock_save.call_args[0][0] is fleh_report
assert fleh_report.meta.id == expected_fleh
assert fleh_report.timestamp == expected_timestamp
mock_save.reset_mock()

for _bad_report in (
Expand Down
18 changes: 18 additions & 0 deletions osf_tests/metrics/test_yearmonth.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,24 @@ YearMonth(year=1974, month=3)
>>> YearMonth.from_str('2000-12')
YearMonth(year=2000, month=12)

`from_any` constructor, accepts YearMonth, "YYYY-MM", or date/datetime
>>> YearMonth.from_any('2000-12')
YearMonth(year=2000, month=12)
>>> YearMonth.from_any(_) is _
True
>>> YearMonth.from_any(datetime.date(1973, 1, 1))
YearMonth(year=1973, month=1)
>>> YearMonth.from_any(datetime.datetime(1974, 3, 2))
YearMonth(year=1974, month=3)
>>> YearMonth.from_any(None)
Traceback (most recent call last):
...
ValueError: cannot coerce None into YearMonth
>>> YearMonth.from_any(7)
Traceback (most recent call last):
...
ValueError: cannot coerce 7 into YearMonth

`__str__` method gives "YYYY-MM" format:
>>> str(YearMonth(1491, 7))
'1491-07'
Expand Down

0 comments on commit eadb41f

Please sign in to comment.