Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-enable health check that includes crunchydb cluster check #3445

Merged
merged 10 commits into from
Mar 5, 2024
12 changes: 6 additions & 6 deletions api/app/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
logger = logging.getLogger(__name__)


def patroni_cluster_health_check():
def crunchydb_cluster_health_check():
""" Makes call to Patroni cluster namespace in Openshift to retrieve the statuses of all
individual Patroni pods, then re-formats response """
parts = [
config.get('OPENSHIFT_BASE_URI'),
config.get('OPENSHIFT_NAMESPACE_API'),
config.get('PROJECT_NAMESPACE'),
'statefulsets/',
'postgresclusters/',
config.get('PATRONI_CLUSTER_NAME')
]
# form URL by concatenating all substrings in parts[], making sure there's exactly 1 / between each part
Expand All @@ -24,12 +24,12 @@ def patroni_cluster_health_check():
}
resp = requests.get(url, headers=header, timeout=10)
resp_json = resp.json()
# NOTE: In Openshift parlance "replica" refers to how many of one pod we have, in Patroni, a "Replica"
# refers to a read only copy of of the Leader.
# NOTE: In Openshift parlance "replica" refers to how many of one pod we have, in CrunchyDB's managed
# Patroni, a "Replica" refers to a read only copy of of the Leader.
# Get the number of pods that are ready:
ready_count = resp_json.get('status').get('readyReplicas')
ready_count = resp_json.get('status').get('instances')[0].get('readyReplicas')
# Get the number of pods we expect:
replica_count = resp_json.get('status').get('replicas')
replica_count = resp_json.get('status').get('instances')[0].get('replicas')
if ready_count > 1:
# It's actually a bit more complicated than this.
# There are a number of scenarios that are ok:
Expand Down
11 changes: 5 additions & 6 deletions api/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from app.percentile import get_precalculated_percentiles
from app.auth import authentication_required, audit
from app import config
# from app import health
from app import health
from app import hourlies
from app.rocketchat_notifications import send_rocketchat_notification
from app.routers import (fba, forecasts, weather_models, c_haines, stations, hfi_calc,
Expand Down Expand Up @@ -123,11 +123,10 @@ async def get_health():
""" A simple endpoint for Openshift Healthchecks.
It's assumed that if patroni is ok, then all is well. """
try:
# TODO reenable
# health_check = health.patroni_cluster_health_check()
health_check = health.crunchydb_cluster_health_check()

# logger.debug('/health - healthy: %s. %s',
# health_check.get('healthy'), health_check.get('message'))
logger.debug('/health - healthy: %s. %s',
health_check.get('healthy'), health_check.get('message'))

# Instantiate the CFFDRS singleton. Binding to R can take quite some time...
cffdrs_start = perf_counter()
Expand All @@ -138,7 +137,7 @@ async def get_health():
if delta > 0.1:
logger.info('%f seconds added by CFFDRS startup', delta)

return {"message": "API healthy", "healthy": True}
return health_check
except Exception as exception:
logger.error(exception, exc_info=True)
raise
Expand Down
Loading
Loading