diff --git a/thrall/app/controllers/ThrallController.scala b/thrall/app/controllers/ThrallController.scala index dab6dedbb0..b39c279155 100644 --- a/thrall/app/controllers/ThrallController.scala +++ b/thrall/app/controllers/ThrallController.scala @@ -58,7 +58,21 @@ class ThrallController( } } - def migrationFailures(maybePage: Option[Int]): Action[AnyContent] = withLoginRedirectAsync { + def migrationFailuresOverview(): Action[AnyContent] = withLoginRedirectAsync { + es.migrationStatus match { + case running: Running => + es.getMigrationFailuresOverview(es.imagesCurrentAlias, running.migrationIndexName).map(failuresOverview => + Ok(views.html.migrationFailuresOverview( + failuresOverview, + apiBaseUrl = services.apiBaseUri, + uiBaseUrl = services.kahunaBaseUri, + )) + ) + case _ => Future.successful(Ok("No current migration")) + } + } + + def migrationFailures(filter: String, maybePage: Option[Int]): Action[AnyContent] = withLoginRedirectAsync { val pageSize = 250 // pages are indexed from 1 val page = maybePage.getOrElse(1) @@ -68,12 +82,13 @@ class ThrallController( } else { es.migrationStatus match { case running: Running => - es.getMigrationFailures(es.imagesCurrentAlias, running.migrationIndexName, from, pageSize).map(failures => + es.getMigrationFailures(es.imagesCurrentAlias, running.migrationIndexName, from, pageSize, filter).map(failures => Ok(views.html.migrationFailures( + failures, apiBaseUrl = services.apiBaseUri, uiBaseUrl = services.kahunaBaseUri, - page = page, - failures = failures + filter, + page )) ) case _ => Future.successful(Ok("No current migration")) diff --git a/thrall/app/lib/FailedMigrationDetails.scala b/thrall/app/lib/FailedMigrationDetails.scala index 516e87fe89..b452467f92 100644 --- a/thrall/app/lib/FailedMigrationDetails.scala +++ b/thrall/app/lib/FailedMigrationDetails.scala @@ -1,5 +1,9 @@ package lib -final case class FailedMigrationDetails(imageId: String, cause: String) +final case class FailedMigrationDetails(imageId: String, lastModified: String, crops: String, usages: String) -final case class FailedMigrationSummary(totalFailed: Long, totalFailedRelation: String, returned: Long, details: Seq[FailedMigrationDetails]) +final case class FailedMigrationSummary(totalFailed: Long, details: Seq[FailedMigrationDetails]) + +final case class FailedMigrationsGrouping(message: String, count: Long, exampleIDs: Seq[String]) + +final case class FailedMigrationsOverview(totalFailed: Long, grouped: Seq[FailedMigrationsGrouping]) diff --git a/thrall/app/lib/elasticsearch/ThrallMigrationClient.scala b/thrall/app/lib/elasticsearch/ThrallMigrationClient.scala index 6966a0a5a2..bd424c0132 100644 --- a/thrall/app/lib/elasticsearch/ThrallMigrationClient.scala +++ b/thrall/app/lib/elasticsearch/ThrallMigrationClient.scala @@ -2,11 +2,14 @@ package lib.elasticsearch import com.gu.mediaservice.lib.elasticsearch.{ElasticSearchClient, InProgress, MigrationAlreadyRunningError, MigrationStatusProvider, NotRunning, Paused} import com.gu.mediaservice.lib.logging.{LogMarker, MarkerMap} +import com.gu.mediaservice.model.Image import com.sksamuel.elastic4s.ElasticApi.{existsQuery, matchQuery, not} import com.sksamuel.elastic4s.ElasticDsl import com.sksamuel.elastic4s.ElasticDsl._ import com.sksamuel.elastic4s.requests.searches.SearchHit -import lib.{FailedMigrationDetails, FailedMigrationSummary} +import com.sksamuel.elastic4s.requests.searches.aggs.responses.bucket.Terms +import com.sksamuel.elastic4s.requests.searches.aggs.responses.metrics.TopHits +import lib.{FailedMigrationDetails, FailedMigrationSummary, FailedMigrationsGrouping, FailedMigrationsOverview} import play.api.libs.json.{JsError, JsSuccess, Json, Reads, __} import scala.concurrent.{ExecutionContext, Future} @@ -81,34 +84,64 @@ trait ThrallMigrationClient extends MigrationStatusProvider { } yield () } + def getMigrationFailuresOverview( + currentIndexName: String, migrationIndexName: String + )(implicit ec: ExecutionContext, logMarker: LogMarker = MarkerMap()): Future[FailedMigrationsOverview] = { + + val examplesSubAggregation = topHitsAgg("examples") + .fetchSource(false) + .size(3) + + val aggregateOnFailureMessage = + termsAgg("failures", s"esInfo.migration.failures.$migrationIndexName.keyword") + .size(1000) + .subAggregations(examplesSubAggregation) + + val aggSearch = ElasticDsl.search(currentIndexName).trackTotalHits(true) query must( + existsQuery(s"esInfo.migration.failures.$migrationIndexName"), + not(matchQuery("esInfo.migration.migratedTo", migrationIndexName)) + ) aggregations aggregateOnFailureMessage + + executeAndLog(aggSearch, s"retrieving grouped overview of migration failures").map { response => + FailedMigrationsOverview( + totalFailed = response.result.hits.total.value, + grouped = response.result.aggregations.result[Terms](aggregateOnFailureMessage.name).buckets.map { bucket => + FailedMigrationsGrouping( + message = bucket.key, + count = bucket.docCount, + exampleIDs = bucket.result[TopHits](examplesSubAggregation.name).hits.map(_.id) + ) + } + ) + } + } + def getMigrationFailures( - currentIndexName: String, migrationIndexName: String, from: Int, pageSize: Int + currentIndexName: String, migrationIndexName: String, from: Int, pageSize: Int, filter: String )(implicit ec: ExecutionContext, logMarker: LogMarker = MarkerMap()): Future[FailedMigrationSummary] = { - val search = ElasticDsl.search(currentIndexName).from(from).size(pageSize) query must( + val search = ElasticDsl.search(currentIndexName).trackTotalHits(true).from(from).size(pageSize) query must( existsQuery(s"esInfo.migration.failures.$migrationIndexName"), + termQuery(s"esInfo.migration.failures.$migrationIndexName.keyword", filter), not(matchQuery("esInfo.migration.migratedTo", migrationIndexName)) - ) + ) sortByFieldDesc "lastModified" executeAndLog(search, s"retrieving list of migration failures") .map { resp => - logger.info(logMarker, s"failed migrations - got ${resp.result.hits.size} hits") val failedMigrationDetails: Seq[FailedMigrationDetails] = resp.result.hits.hits.map { hit => - logger.info(logMarker, s"failed migrations - got hit $hit.id") - val source = hit.sourceAsString - val cause = Json.parse(source).validate(Json.reads[EsInfoContainer]) match { - case JsSuccess(EsInfoContainer(EsInfo(Some(MigrationInfo(Some(failures), _)))), _) => - failures.getOrElse(migrationIndexName, "UNKNOWN - NO FAILURE MATCHING MIGRATION INDEX NAME") - case JsError(errors) => - logger.error(logMarker, s"Could not parse EsInfo for ${hit.id} - $errors") - "Could not extract migration info from ES due to parsing failure" - case _ => "UNKNOWN - NO FAILURE MATCHING MIGRATION INDEX NAME" - } - FailedMigrationDetails(imageId = hit.id, cause = cause) + + Json.parse(hit.sourceAsString).asOpt[Image].fold( + FailedMigrationDetails(hit.id, "ES DOC CORRUPT", "ES DOC CORRUPT", "ES DOC CORRUPT") + )( image => + FailedMigrationDetails( + imageId = hit.id, + lastModified = image.lastModified.fold("")(_.toString), + crops = image.exports.size.toString, + usages = image.usages.size.toString + ) + ) } FailedMigrationSummary( totalFailed = resp.result.hits.total.value, - totalFailedRelation = resp.result.hits.total.relation, - returned = resp.result.hits.hits.length, details = failedMigrationDetails ) } diff --git a/thrall/app/views/index.scala.html b/thrall/app/views/index.scala.html index b42dd74e7d..abbfc9616e 100644 --- a/thrall/app/views/index.scala.html +++ b/thrall/app/views/index.scala.html @@ -80,7 +80,7 @@