Skip to content

Commit

Permalink
revise code to ingest-csv fixes
Browse files Browse the repository at this point in the history
remove stale code
  • Loading branch information
lixiliu committed Nov 14, 2024
1 parent 03c6b81 commit af399b6
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 13 deletions.
12 changes: 1 addition & 11 deletions src/chronify/time_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,19 +297,9 @@ class RepresentativePeriodTimeRange(TimeBaseModel):

time_type: Literal[TimeType.REPRESENTATIVE_PERIOD] = TimeType.REPRESENTATIVE_PERIOD
time_format: RepresentativePeriodFormat
# time_columns: list[str] = Field(description="Columns in the table that represent time.")
measurement_type: MeasurementType = MeasurementType.TOTAL
interval_type: TimeIntervalType = TimeIntervalType.PERIOD_ENDING

# @model_validator(mode="after")
# def check_columns(self) -> "RepresentativePeriodTimeRange":
# expected = representative_period_columns[self.time_format]

# if set(self.time_columns) != set(expected):
# msg = f"Incorrect {self.time_columns=} for {self.time_format=}, {expected=}"
# raise InvalidParameter(msg)
# return self

def list_time_columns(self) -> list[str]:
match self.time_format:
case RepresentativePeriodFormat.ONE_WEEK_PER_MONTH_BY_HOUR:
Expand All @@ -324,7 +314,7 @@ def iter_timestamps(self) -> Generator[int, None, None]:
case RepresentativePeriodFormat.ONE_WEEKDAY_DAY_AND_ONE_WEEKEND_DAY_PER_MONTH_BY_HOUR:
return OneWeekdayDayAndWeekendDayPerMonthByHourHandler().iter_timestamps()

def list_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[Any]:
def list_distinct_timestamps_from_dataframe(self, df: pd.DataFrame) -> list[Any]:
return df[self.list_time_columns()].drop_duplicates().apply(tuple, axis=1).to_list()


Expand Down
2 changes: 1 addition & 1 deletion src/chronify/time_series_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _run_timestamp_checks_on_tmp_table(self, table_name: str) -> None:
result3 = self._conn.execute(text(query3)).fetchone()
assert result3 is not None
actual_count = result3[0]
expected_count = len(schema.time_config.list_timestamps())
expected_count = len(self._schema.time_config.list_timestamps())
if actual_count != expected_count:
msg = f"Time arrays must have length={expected_count}. Actual = {actual_count}"
raise InvalidTable(msg)
1 change: 0 additions & 1 deletion tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ def test_ingest_csv(iter_engines: Engine, tmp_path, generators_schema, use_time_
)
store.ingest_from_csv(new_file, src_schema2, dst_schema)
df = store.read_table(dst_schema)
breakpoint()
assert len(df) == 8784 * 3 * 2
all(df.timestamp.unique() == dst_schema.time_config.list_timestamps())

Expand Down

0 comments on commit af399b6

Please sign in to comment.