Skip to content

Commit

Permalink
Add combined health check endpoint which can check multiple components (
Browse files Browse the repository at this point in the history
#8191)

Co-authored-by: Indy Prentice <[email protected]>
Co-authored-by: david-leifker <[email protected]>
  • Loading branch information
3 people authored Jun 9, 2023
1 parent 29b22f4 commit 2b94a10
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,9 @@ public class ConfigurationProvider {
* Configuration for caching
*/
private CacheConfiguration cache;

/**
* Configuration for the health check server
*/
private HealthCheckConfiguration healthCheck;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.linkedin.gms.factory.config;

import lombok.Data;


@Data
public class HealthCheckConfiguration {
private int cacheDurationSeconds;
}
3 changes: 3 additions & 0 deletions metadata-service/factories/src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ systemUpdate:
backOffFactor: ${BOOTSTRAP_SYSTEM_UPDATE_BACK_OFF_FACTOR:2} # Multiplicative factor for back off, default values will result in waiting 5min 15s
waitForSystemUpdate: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:true}

healthCheck:
cacheDurationSeconds: ${HEALTH_CHECK_CACHE_DURATION_SECONDS:5}

featureFlags:
showSimplifiedHomepageByDefault: ${SHOW_SIMPLIFIED_HOMEPAGE_BY_DEFAULT:false} # shows a simplified homepage with just datasets, charts and dashboards by default to users. this can be configured in user settings
lineageSearchCacheEnabled: ${LINEAGE_SEARCH_CACHE_ENABLED:true} # Enables in-memory cache for searchAcrossLineage query
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
package com.datahub.health.controller;

import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import io.swagger.v3.oas.annotations.tags.Tag;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.client.RequestOptions;
Expand All @@ -26,11 +32,46 @@ public class HealthCheckController {
@Autowired
@Qualifier("elasticSearchRestHighLevelClient")
private RestHighLevelClient elasticClient;
private Supplier<ResponseEntity<String>> memoizedSupplier;
private final Supplier<ResponseEntity<String>> memoizedSupplier;

public HealthCheckController(ConfigurationProvider config) {

public HealthCheckController() {
this.memoizedSupplier = Suppliers.memoizeWithExpiration(
this::getElasticHealth, 5, TimeUnit.SECONDS);
this::getElasticHealth, config.getHealthCheck().getCacheDurationSeconds(), TimeUnit.SECONDS);
}

/**
* Combined health check endpoint for checking GMS clients.
* For now, just checks the health of the ElasticSearch client
* @return A ResponseEntity with a Map of String (component name) to ResponseEntity (the health check status of
* that component). The status code will be 200 if all components are okay, and 500 if one or more components are not
* healthy.
*/
@GetMapping(path = "/ready", produces = MediaType.APPLICATION_JSON_VALUE)
public ResponseEntity<Map<String, ResponseEntity<String>>> getCombinedHealthCheck(String... checks) {

Map<String, Supplier<ResponseEntity<String>>> healthChecks = new HashMap<>();
healthChecks.put("elasticsearch", this::getElasticHealthWithCache);
// Add new components here

List<String> componentsToCheck = checks != null && checks.length > 0
? Arrays.asList(checks)
: new ArrayList<>(healthChecks.keySet());

Map<String, ResponseEntity<String>> componentHealth = new HashMap<>();
for (String check : componentsToCheck) {
componentHealth.put(check,
healthChecks.getOrDefault(check,
() -> ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body("Unrecognized component " + check))
.get());
}


boolean isHealthy = componentHealth.values().stream().allMatch(resp -> resp.getStatusCode() == HttpStatus.OK);
if (isHealthy) {
return ResponseEntity.ok(componentHealth);
}
return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body(componentHealth);
}

/**
Expand All @@ -43,8 +84,8 @@ public ResponseEntity<String> getElasticHealthWithCache() {
}

/**
*
* @return
* Query ElasticSearch health endpoint
* @return A response including the result from ElasticSearch
*/
private ResponseEntity<String> getElasticHealth() {
String responseString = null;
Expand All @@ -63,6 +104,6 @@ private ResponseEntity<String> getElasticHealth() {
responseString = e.getMessage();
}
}
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(responseString);
return ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body(responseString);
}
}

0 comments on commit 2b94a10

Please sign in to comment.