diff --git a/api/alembic/versions/07007f659064_partition_weather_station_model_.py b/api/alembic/versions/07007f659064_partition_weather_station_model_.py new file mode 100644 index 000000000..b6d812556 --- /dev/null +++ b/api/alembic/versions/07007f659064_partition_weather_station_model_.py @@ -0,0 +1,63 @@ +"""partition weather_station_model_predictions part 1 + +Revision ID: 07007f659064 +Revises: c5bea0920d53 +Create Date: 2024-11-04 10:41:31.466124 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "07007f659064" +down_revision = "c5bea0920d53" +branch_labels = None +depends_on = None + +### Adapted from pgslice "prep" command +# BEGIN; +# CREATE TABLE "public"."weather_station_model_predictions_intermediate" (LIKE "public"."weather_station_model_predictions" INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS INCLUDING STATISTICS INCLUDING GENERATED INCLUDING COMPRESSION) PARTITION BY RANGE ("prediction_timestamp"); +# CREATE UNIQUE INDEX ON "public"."weather_station_model_predictions_intermediate" USING btree (station_code, prediction_model_run_timestamp_id, prediction_timestamp); +# CREATE INDEX ON "public"."weather_station_model_predictions_intermediate" USING btree (id); +# CREATE INDEX ON "public"."weather_station_model_predictions_intermediate" USING btree (prediction_model_run_timestamp_id); +# CREATE INDEX ON "public"."weather_station_model_predictions_intermediate" USING btree (prediction_timestamp); +# CREATE INDEX ON "public"."weather_station_model_predictions_intermediate" USING btree (station_code); +# CREATE INDEX ON "public"."weather_station_model_predictions_intermediate" USING btree (update_date); +# CREATE INDEX ON "public"."weather_station_model_predictions_intermediate" USING btree (prediction_timestamp, station_code); +# ALTER TABLE "public"."weather_station_model_predictions_intermediate" ADD FOREIGN KEY (prediction_model_run_timestamp_id) REFERENCES prediction_model_run_timestamps(id); +# COMMENT ON TABLE "public"."weather_station_model_predictions_intermediate" IS 'column:prediction_timestamp,period:month,cast:timestamptz,version:3'; +# COMMIT; + + +def upgrade(): + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_intermediate" (LIKE "public"."weather_station_model_predictions" INCLUDING DEFAULTS INCLUDING CONSTRAINTS INCLUDING STORAGE INCLUDING COMMENTS INCLUDING STATISTICS INCLUDING GENERATED INCLUDING COMPRESSION) PARTITION BY RANGE ("prediction_timestamp");' + ) + op.execute( + 'CREATE UNIQUE INDEX wsmp_unique_record_idx ON "public"."weather_station_model_predictions_intermediate" USING btree (station_code, prediction_model_run_timestamp_id, prediction_timestamp);' + ) + op.execute('CREATE INDEX wsmp_id_idx ON "public"."weather_station_model_predictions_intermediate" USING btree (id);') + op.execute('CREATE INDEX wsmp_prediction_model_run_timestamp_id_idx ON "public"."weather_station_model_predictions_intermediate" USING btree (prediction_model_run_timestamp_id);') + op.execute('CREATE INDEX wsmp_prediction_timestamp_idx ON "public"."weather_station_model_predictions_intermediate" USING btree (prediction_timestamp);') + op.execute('CREATE INDEX wsmp_station_code_idx ON "public"."weather_station_model_predictions_intermediate" USING btree (station_code);') + op.execute('CREATE INDEX wsmp_update_date_idx ON "public"."weather_station_model_predictions_intermediate" USING btree (update_date);') + op.execute('CREATE INDEX wsmp_prediction_station_code_idx ON "public"."weather_station_model_predictions_intermediate" USING btree (prediction_timestamp, station_code);') + op.execute( + 'ALTER TABLE "public"."weather_station_model_predictions_intermediate" ADD CONSTRAINT wsmp_id_fk FOREIGN KEY (prediction_model_run_timestamp_id) REFERENCES prediction_model_run_timestamps(id);' + ) + op.execute('COMMENT ON TABLE "public"."weather_station_model_predictions_intermediate" IS \'column:prediction_timestamp,period:month,cast:timestamptz,version:3\';') + + +def downgrade(): + op.execute('COMMENT ON TABLE "public"."weather_station_model_predictions_intermediate" IS NULL;') + op.execute('ALTER TABLE "public"."weather_station_model_predictions_intermediate" DROP CONSTRAINT wsmp_id_fk;') + op.execute("DROP INDEX wsmp_prediction_station_code_idx;") + op.execute("DROP INDEX wsmp_update_date_idx;") + op.execute("DROP INDEX wsmp_station_code_idx;") + op.execute("DROP INDEX wsmp_prediction_timestamp_idx;") + op.execute("DROP INDEX wsmp_prediction_model_run_timestamp_id_idx;") + op.execute("DROP INDEX wsmp_id_idx;") + op.execute("DROP INDEX wsmp_unique_record_idx;") + op.execute('DROP TABLE "public"."weather_station_model_predictions_intermediate"') diff --git a/api/alembic/versions/362d268606f3_partition_weather_station_model_.py b/api/alembic/versions/362d268606f3_partition_weather_station_model_.py new file mode 100644 index 000000000..3a8fb6967 --- /dev/null +++ b/api/alembic/versions/362d268606f3_partition_weather_station_model_.py @@ -0,0 +1,107 @@ +"""partition weather_station_model_predictions part 2 + +Revision ID: 362d268606f3 +Revises: 07007f659064 +Create Date: 2024-11-04 11:02:57.501656 + +""" + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "362d268606f3" +down_revision = "07007f659064" +branch_labels = None +depends_on = None + +### Adapted from pgslice "add_partitions" command, partitions previous 6 months, and the future 3 months +# BEGIN; +# CREATE TABLE "public"."weather_station_model_predictions_202405" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2024-05-01 00:00:00 UTC') TO ('2024-06-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202405" ADD PRIMARY KEY ("id"); + +# CREATE TABLE "public"."weather_station_model_predictions_202406" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2024-06-01 00:00:00 UTC') TO ('2024-07-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202406" ADD PRIMARY KEY ("id"); + + +# CREATE TABLE "public"."weather_station_model_predictions_202407" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2024-07-01 00:00:00 UTC') TO ('2024-08-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202407" ADD PRIMARY KEY ("id"); + +# CREATE TABLE "public"."weather_station_model_predictions_202408" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2024-08-01 00:00:00 UTC') TO ('2024-09-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202408" ADD PRIMARY KEY ("id"); + +# CREATE TABLE "public"."weather_station_model_predictions_202409" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2024-09-01 00:00:00 UTC') TO ('2024-10-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202409" ADD PRIMARY KEY ("id"); + +# CREATE TABLE "public"."weather_station_model_predictions_202410" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2024-10-01 00:00:00 UTC') TO ('2024-11-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202410" ADD PRIMARY KEY ("id"); + +# CREATE TABLE "public"."weather_station_model_predictions_202411" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2024-11-01 00:00:00 UTC') TO ('2024-12-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202411" ADD PRIMARY KEY ("id"); + +# CREATE TABLE "public"."weather_station_model_predictions_202412" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2024-12-01 00:00:00 UTC') TO ('2025-01-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202412" ADD PRIMARY KEY ("id"); + +# CREATE TABLE "public"."weather_station_model_predictions_202501" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2025-01-01 00:00:00 UTC') TO ('2025-02-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202501" ADD PRIMARY KEY ("id"); + +# CREATE TABLE "public"."weather_station_model_predictions_202502" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM ('2025-02-01 00:00:00 UTC') TO ('2025-03-01 00:00:00 UTC'); +# ALTER TABLE "public"."weather_station_model_predictions_202502" ADD PRIMARY KEY ("id"); +# COMMIT; + + +def upgrade(): + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202405" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2024-05-01 00:00:00 UTC\') TO (\'2024-06-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202405" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202406" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2024-06-01 00:00:00 UTC\') TO (\'2024-07-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202406" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202407" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2024-07-01 00:00:00 UTC\') TO (\'2024-08-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202407" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202408" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2024-08-01 00:00:00 UTC\') TO (\'2024-09-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202408" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202409" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2024-09-01 00:00:00 UTC\') TO (\'2024-10-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202409" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202410" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2024-10-01 00:00:00 UTC\') TO (\'2024-11-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202410" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202411" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2024-11-01 00:00:00 UTC\') TO (\'2024-12-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202411" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202412" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2024-12-01 00:00:00 UTC\') TO (\'2025-01-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202412" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202501" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2025-01-01 00:00:00 UTC\') TO (\'2025-02-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202501" ADD PRIMARY KEY ("id");') + op.execute( + 'CREATE TABLE "public"."weather_station_model_predictions_202502" PARTITION OF "public"."weather_station_model_predictions_intermediate" FOR VALUES FROM (\'2025-02-01 00:00:00 UTC\') TO (\'2025-03-01 00:00:00 UTC\');' + ) + op.execute('ALTER TABLE "public"."weather_station_model_predictions_202502" ADD PRIMARY KEY ("id");') + + +def downgrade(): + op.execute("DROP TABLE weather_station_model_predictions_202502;") + op.execute("DROP TABLE weather_station_model_predictions_202501;") + op.execute("DROP TABLE weather_station_model_predictions_202412;") + op.execute("DROP TABLE weather_station_model_predictions_202411;") + op.execute("DROP TABLE weather_station_model_predictions_202410;") + op.execute("DROP TABLE weather_station_model_predictions_202409;") + op.execute("DROP TABLE weather_station_model_predictions_202408;") + op.execute("DROP TABLE weather_station_model_predictions_202407;") + op.execute("DROP TABLE weather_station_model_predictions_202406;") + op.execute("DROP TABLE weather_station_model_predictions_202405;") diff --git a/openshift/pgslice/docker/Dockerfile b/openshift/pgslice/docker/Dockerfile new file mode 100644 index 000000000..efc6ceb2b --- /dev/null +++ b/openshift/pgslice/docker/Dockerfile @@ -0,0 +1,2 @@ +FROM ankane/pgslice:v0.6.1 +COPY fill_partition_data.sh . \ No newline at end of file diff --git a/openshift/pgslice/docker/fill_partition_data.sh b/openshift/pgslice/docker/fill_partition_data.sh new file mode 100755 index 000000000..8b264a3a7 --- /dev/null +++ b/openshift/pgslice/docker/fill_partition_data.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# usage example: +# PG_PASSWORD=wps PG_HOSTNAME=localhost PG_PORT=5432 PG_USER=wps PG_DATABASE=wps TABLE=table ./backup_to_s3.sh + +# variable checks +if [ -z ${PG_PASSWORD+0} ] +then + echo "PG_PASSWORD not specified" + echo "Specify a postgres password" + exit 1 +fi + +if [ -z ${PG_HOSTNAME+0} ] +then + echo "PG_HOSTNAME not specified" + echo "Specify a postgres hostname" + exit 1 +fi + +if [ -z ${PG_PORT+0} ] +then + echo "PG_PORT not specified" + echo "Specify a postgres port" + exit 1 +fi + +if [ -z ${PG_USER+0} ] +then + echo "PG_USER not specified" + echo "Specify a postgres user" + exit 1 +fi + +if [ -z ${PG_DATABASE+0} ] +then + echo "PG_DATABASE not specified" + echo "Specify a postgres database" + exit 1 +fi + +if [ -z ${TABLE+0} ] +then + echo "TABLE not specified" + echo "Specify a postgres table" + exit 1 +fi + +export PGSLICE_URL = "postgresql://${PG_USER}:${PG_PASSWORD}@${PG_HOSTNAME}:${PG_PORT}/${PG_DATABASE}" +pgslice fill $TABLE +pgslice analyze $TABLE +pgslice swap $TABLE \ No newline at end of file diff --git a/openshift/pgslice/openshift/README.md b/openshift/pgslice/openshift/README.md new file mode 100644 index 000000000..df0ded4b6 --- /dev/null +++ b/openshift/pgslice/openshift/README.md @@ -0,0 +1,18 @@ +# pgslice partitioning image + +Uses https://hub.docker.com/r/ankane/pgslice to run the pgslice commands +Runs `fill`, `analyze` and `swap` for a newly partitioned table where the original has data, to fill the partitions with existing data. + +## Building + +### Create a new build + +```bash +`oc -n e1e498-tools process -f build.yaml -p VERSION=05-11-2024 | oc -n e1e498-tools apply -f -` +``` + +### Kick off a new build + +```bash +oc -n e1e498-tools start-build pgslice --follow +``` diff --git a/openshift/pgslice/openshift/build.yaml b/openshift/pgslice/openshift/build.yaml new file mode 100644 index 000000000..0880a48fb --- /dev/null +++ b/openshift/pgslice/openshift/build.yaml @@ -0,0 +1,64 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + creationTimestamp: null + name: pgslice +labels: + app: pgslice + phase: build + app.kubernetes.io/name: pgslice + app.kubernetes.io/managed-by: template +parameters: + - name: NAME + value: pgslice + - name: VERSION + description: Output version + required: true + value: "latest" + - name: GIT_URL + value: https://github.com/bcgov/wps.git + - name: GIT_BRANCH + value: task/partition-weather-station-model-predictions +objects: + - apiVersion: v1 + kind: ImageStream + metadata: + annotations: + openshift.io/generated-by: OpenShiftNewBuild + labels: + app: pgslice + common: "true" + name: pgslice + spec: + lookupPolicy: + local: false + - apiVersion: v1 + kind: BuildConfig + metadata: + annotations: + openshift.io/generated-by: OpenShiftNewBuild + labels: + app: pgslice + name: pgslice + spec: + resources: + limits: + cpu: "2000m" + memory: "512Mi" + requests: + # we might as well start high! + cpu: "500m" + memory: "256Mi" + completionDeadlineSeconds: 600 # 10 minutes. + output: + to: + kind: ImageStreamTag + name: pgslice:${VERSION} + source: + type: Git + git: + uri: ${GIT_URL} + ref: origin/${GIT_BRANCH} + contextDir: openshift/pgslice/docker + strategy: + type: Docker diff --git a/openshift/scripts/oc_provision_fill_partition_job.sh b/openshift/scripts/oc_provision_fill_partition_job.sh new file mode 100755 index 000000000..bc0bf96a2 --- /dev/null +++ b/openshift/scripts/oc_provision_fill_partition_job.sh @@ -0,0 +1,34 @@ +#!/bin/sh -l +# +source "$(dirname ${0})/common/common" + +#% +#% OpenShift Deploy Helper +#% +#% Intended for use with a pull request-based pipeline. +#% Suffixes incl.: pr-###. +#% +#% Usage: +#% +#% [PROJ_TARGET] [PG_DATABASE] [TABLE] ${THIS_FILE} [SUFFIX] +#% +#% Examples: +#% +#% PROJ_TARGET=e1e498-dev PG_DATABASE=wps TABLE=table ${THIS_FILE} pr-0 + +JOB="job/fill-partition-data-${SUFFIX}" + +# create the job +oc -n ${PROJ_TARGET} process -f ${TEMPLATE_PATH}/partition_filler_job.yaml \ + -p SUFFIX=${SUFFIX} \ + -p PG_DATABASE=${PG_DATABASE} \ + -p TABLE=${TABLE} \ + -p CRUNCHYDB_USER=wps-crunchydb-${SUFFIX}-pguser-wps-crunchydb-${SUFFIX} \ + -p PROJ_TOOLS=${PROJ_TOOLS} | jq '.items[0]' | oc -n ${PROJ_TARGET} create -f - +# wait for the job to finish +oc wait --for=condition=complete ${JOB} --timeout=3600s +# output the log for debugging +oc logs -f ${JOB} +# we're done, so get rid of the job +oc delete ${JOB} + diff --git a/openshift/templates/partition_filler_job.yaml b/openshift/templates/partition_filler_job.yaml new file mode 100644 index 000000000..ecf1d0a74 --- /dev/null +++ b/openshift/templates/partition_filler_job.yaml @@ -0,0 +1,88 @@ +kind: "Template" +apiVersion: "template.openshift.io/v1" +metadata: + name: prune-hourlies-job-template + annotations: + description: "Job to fill partitioned tables with data from original table." + tags: "job,sfms" +labels: + app.kubernetes.io/part-of: "${NAME}" + app: ${NAME}-${SUFFIX} +parameters: + - name: NAME + description: Module name + value: wps + - name: SUFFIX + description: Deployment suffix, e.g. pr-### + required: true + - name: GLOBAL_NAME + description: Name of global Module + value: wps-global + - name: PROJ_TOOLS + value: e1e498-tools + - name: "TAG_NAME" + displayName: "Environment TAG name" + description: "The TAG name for the docker image" + required: true + value: "prod" + - name: PG_DATABASE + required: true + - name: CRUNCHYDB_USER + required: true + - name: "TABLE" + description: "The table that is partitioned" + required: true +objects: + - kind: Job + apiVersion: batch/v1 + metadata: + name: partition-filler-${NAME}-${SUFFIX} + spec: + parallelism: 1 + completions: 1 + activeDeadlineSeconds: 3600 + backoffLimit: 6 + template: + metadata: + name: partition-filler-${NAME}-${SUFFIX} + spec: + template: + spec: + containers: + - name: partition-filler-${NAME}-${SUFFIX} + image: "image-registry.openshift-image-registry.svc:5000/${PROJ_TOOLS}/pgslice:${TAG_NAME}" + imagePullPolicy: "Always" + command: ["bash", "fill_partition_data.sh", "${PG_DATABASE}"] + env: + - name: PG_USER + valueFrom: + secretKeyRef: + name: ${CRUNCHYDB_USER} + key: user + - name: PG_PASSWORD + valueFrom: + secretKeyRef: + name: ${CRUNCHYDB_USER} + key: password + - name: PG_HOSTNAME + valueFrom: + secretKeyRef: + name: ${CRUNCHYDB_USER} + key: pgbouncer-host + - name: PG_PORT + valueFrom: + secretKeyRef: + name: ${CRUNCHYDB_USER} + key: pgbouncer-port + - name: PG_DATABASE + value: ${PG_DATABASE} + - name: SUFFIX + value: ${SUFFIX} + resources: + limits: + cpu: "1" + memory: 256Mi + requests: + cpu: "500m" + memory: 128Mi + restartPolicy: OnFailure