Add representative time mapping #19

lixiliu · 2024-11-14T19:22:59Z

No description provided.

initial commit

remove stale code

daniel-thom · 2024-11-14T21:05:18Z

src/chronify/sqlalchemy/functions.py

            df[config.time_column] = df[config.time_column].dt.tz_convert("UTC")
        else:
            df[config.time_column] = df[config.time_column].dt.tz_localize("UTC")
-    pl.DataFrame(df).write_database(schema.name, connection=conn, if_table_exists="append")
+    pl.DataFrame(df).write_database(schema.name, connection=conn, if_table_exists="replace")


Why replace? It will delete whatever data currently exists? Is there a use case for this? If so, the parameter needs to be passed in. We definitely need append.

This is going to be an arg input

lixiliu · 2024-11-14T23:27:04Z

src/chronify/sqlalchemy/functions.py

@@ -18,6 +18,9 @@ def read_database(query: Selectable | str, conn: Connection, schema: TableSchema
            df[config.time_column] = pd.to_datetime(df[config.time_column], utc=True)
        else:
            df[config.time_column] = df[config.time_column].dt.tz_localize("UTC")
+    elif conn.engine.name == "sqlite" and isinstance(config, DatetimeRange):


refactor so sqlite related processing is in one place. Suggest removing needs_utc_conversion

lixiliu · 2024-11-14T23:34:38Z

src/chronify/time_configs.py

@@ -129,7 +131,7 @@ class TimeBasedDataAdjustment(ChronifyBaseModel):
 class TimeBaseModel(ChronifyBaseModel, abc.ABC):
    """Defines a base model common to all time dimensions."""

-    length: int
+    measurement_type: MeasurementType = MeasurementType.TOTAL


Do we need this?

lixiliu · 2024-11-14T23:42:16Z

src/chronify/time_configs.py

@@ -233,7 +235,8 @@ class AnnualTimeRange(TimeBaseModel):
    time_column: str = Field(description="Column in the table that represents time.")
    time_type: Literal[TimeType.ANNUAL] = TimeType.ANNUAL
    start: int
-    # TODO: measurement_type must be TOTAL
+    length: int
+    # TODO: measurement_type must be TOTAL, not necessarily right?


Add interval_type (base class)

lixiliu · 2024-11-14T23:45:39Z

src/chronify/time_configs.py


    def list_time_columns(self) -> list[str]:
-        return self.time_columns
+        match self.time_format:


Assign self._handler to make polymorphic

lixiliu · 2024-11-14T23:52:18Z

src/chronify/time_configs.py

+                return OneWeekdayDayAndWeekendDayPerMonthByHourHandler().iter_timestamps()
+
+    def list_distinct_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[Any]:
+        return df[self.list_time_columns()].drop_duplicates().apply(tuple, axis=1).to_list()


Use: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.itertuples.html

lixiliu · 2024-11-14T23:55:36Z

src/chronify/time_configs.py

+
+class OneWeekPerMonthByHourHandler(RepresentativeTimeFormatHandlerBase):
+    """Handler for format with hourly data that includes one week per month."""
+


TIME COLUMNS = define names here instead
representative_period_columns[RepresentativePeriodFormat.ONE_WEEK_PER_MONTH_BY_HOUR]

lixiliu · 2024-11-14T23:58:41Z

src/chronify/time_configs.py

+    @staticmethod
+    def iter_timestamps() -> Generator[Any, None, None]:
+        for month in range(1, 13):
+            for is_weekday in sorted([False, True]):


remove sorted

lixiliu · 2024-11-15T00:00:35Z

src/chronify/time_configs.py

+        ]
+
+    @staticmethod
+    def iter_timestamps() -> Generator[Any, None, None]:


Generator[tuple[int, bool, int], None, None]
dtto

lixiliu · 2024-11-15T00:02:25Z

src/chronify/time_series_mapper.py

+    if isinstance(from_schema.time_config, RepresentativePeriodTimeRange) and isinstance(
+        to_schema.time_config, DatetimeRange
+    ):
+        return MapperRepresentativeTimeToDatetime(


lixiliu · 2024-11-15T00:02:32Z

src/chronify/time_series_mapper.py

-class TimeSeriesMapper:
-    """Maps time series data from one configuration to another."""
+def map_time(engine, metadata, from_schema, to_schema):
+    """Factory function to map time using the appropriate TimeSeriesMapper model."""


Remove factory

lixiliu · 2024-11-15T00:07:55Z

src/chronify/time_series_mapper_base.py

+class TimeSeriesMapperBase(abc.ABC):
+    """Maps time series data from one configuration to another."""
+
+    @abc.abstractmethod


not required, remove.

daniel-thom · 2024-11-15T02:01:32Z

src/chronify/time_series_mapper_representative.py

+        self._metadata = metadata
+        self.from_schema = from_schema
+        self.to_schema = to_schema
+        self.weekday_column = representative_weekday_column[from_schema.time_config.time_format]


There could be derived classes that have no concept of weekday.

daniel-thom · 2024-11-15T02:06:08Z

src/chronify/time_series_mapper_representative.py

+
+    def _check_source_table_has_time_zone(self) -> None:
+        if "time_zone" not in self.from_schema.time_array_id_columns:
+            msg = f"time_zone is required for representative time mapping and it is missing from source table: {self.from_schema.time_config.name}"


self.from_schema.name?

daniel-thom · 2024-11-15T02:08:19Z

src/chronify/time_series_mapper_representative.py

+
+    def _check_source_table_has_time_zone(self) -> None:
+        if "time_zone" not in self.from_schema.time_array_id_columns:
+            msg = f"time_zone is required for representative time mapping and it is missing from source table: {self.from_schema.time_config.name}"


Should the message include something about time zone aware?

daniel-thom · 2024-11-15T02:10:53Z

src/chronify/time_series_mapper_representative.py

+            pd.Series(self.to_schema.time_config.list_timestamps()).rename(to_time_col).to_frame()
+        )
+
+        # Apply checks


Comments like these are not necessary.

daniel-thom · 2024-11-15T02:13:18Z

src/chronify/time_series_mapper_representative.py

+
+        # Ingest mapping into db
+        time_array_id_columns = [
+            x for x in self.from_schema.time_config.list_time_columns() if x != "hour"


x != "hour" feels out of place in this class, which should be generic to any representative time.

daniel-thom · 2024-11-15T03:10:07Z