From 7dd4f90eb4a5312b5c1272984ab15b293c106b65 Mon Sep 17 00:00:00 2001 From: utas-raymondng Date: Tue, 3 Sep 2024 11:13:13 +1000 Subject: [PATCH 1/2] Add beginWithUuid --- .../controller/IndexerController.java | 29 ++++++++++++++----- .../esindexer/service/GeoNetworkService.java | 2 +- .../service/GeoNetworkServiceImpl.java | 9 ++---- .../esindexer/service/IndexerService.java | 2 +- .../esindexer/service/IndexerServiceImpl.java | 16 ++++++---- .../au/org/aodn/esindexer/BaseTestClass.java | 2 -- .../service/GeoNetworkServiceTests.java | 11 +++---- .../service/IndexerServiceTests.java | 16 +++++----- 8 files changed, 50 insertions(+), 37 deletions(-) diff --git a/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerController.java b/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerController.java index 34b5ffbb..760f2180 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerController.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerController.java @@ -48,11 +48,22 @@ public ResponseEntity getDocumentByUUID(@PathVariable("uuid") String ObjectNode response = indexerService.getDocumentByUUID(uuid).source(); return ResponseEntity.status(HttpStatus.OK).body(response); } - + /** + * A synchronized load operation, useful for local run but likely fail in cloud due to gateway time out. No response + * come back unlike everything done. Please use async load with postman if you want feedback constantly. + * + * @param confirm - Must set to true to begin load + * @param beginWithUuid - You want to start load with particular uuid, it is useful for resume previous incomplete reload + * @return A string contains all ingested record status + * @throws IOException - Any failure during reload, it is the called to handle the error + */ @PostMapping(path="/all", consumes = "application/json", produces = "application/json") @Operation(security = { @SecurityRequirement(name = "X-API-Key") }, description = "Index all metadata records from GeoNetwork") - public ResponseEntity indexAllMetadataRecords(@RequestParam(value = "confirm", defaultValue = "false") Boolean confirm) throws IOException { - List responses = indexerService.indexAllMetadataRecordsFromGeoNetwork(confirm, null); + public ResponseEntity indexAllMetadataRecords( + @RequestParam(value = "confirm", defaultValue = "false") Boolean confirm, + @RequestParam(value = "beginWithUuid", required=false) String beginWithUuid) throws IOException { + + List responses = indexerService.indexAllMetadataRecordsFromGeoNetwork(beginWithUuid, confirm, null); return ResponseEntity.ok(responses.toString()); } /** @@ -62,12 +73,16 @@ public ResponseEntity indexAllMetadataRecords(@RequestParam(value = "con * Noted: There is a bug in postman desktop, so either you run postman using web-browser with agent directly * or you need to have version 10.2 or above in order to get the emitted result * - * @param confirm - * @return + * @param confirm - Must set to true to begin load + * @param beginWithUuid - You want to start load with particular uuid, it is useful for resume previous incomplete reload + * @return The SSeEmitter for status update, you can use it to tell which record is being ingested and ingest status. */ @PostMapping(path="/async/all") @Operation(security = { @SecurityRequirement(name = "X-API-Key") }, description = "Index all metadata records from GeoNetwork") - public SseEmitter indexAllMetadataRecordsAsync(@RequestParam(value = "confirm", defaultValue = "false") Boolean confirm) { + public SseEmitter indexAllMetadataRecordsAsync( + @RequestParam(value = "confirm", defaultValue = "false") Boolean confirm, + @RequestParam(value = "beginWith", required=false) String beginWithUuid) { + final SseEmitter emitter = new SseEmitter(0L); // 0L means no timeout; IndexerService.Callback callback = new IndexerService.Callback() { @@ -107,7 +122,7 @@ public void onComplete(Object result) { new Thread(() -> { try { - indexerService.indexAllMetadataRecordsFromGeoNetwork(confirm, callback); + indexerService.indexAllMetadataRecordsFromGeoNetwork(beginWithUuid, confirm, callback); } catch(IOException e) { emitter.completeWithError(e); diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/GeoNetworkService.java b/indexer/src/main/java/au/org/aodn/esindexer/service/GeoNetworkService.java index 4bff23e8..f529552d 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/GeoNetworkService.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/GeoNetworkService.java @@ -22,7 +22,7 @@ public interface GeoNetworkService { * Return Iterable of records, noted that the item inside can be null, so please check null on each item * @return */ - Iterable getAllMetadataRecords(); + Iterable getAllMetadataRecords(String beginWithUuid); /** * This function can avoid elastic outsync and achieve what we need here as the only use case is * check if there is only 1 document in elastic. diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/GeoNetworkServiceImpl.java b/indexer/src/main/java/au/org/aodn/esindexer/service/GeoNetworkServiceImpl.java index 570d091c..b125843d 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/GeoNetworkServiceImpl.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/GeoNetworkServiceImpl.java @@ -58,11 +58,6 @@ public class GeoNetworkServiceImpl implements GeoNetworkService { @Value("${elasticsearch.query.pageSize:100}") protected int ES_PAGE_SIZE; - // Use for debug only if run the indexer locally and hit an issue, you do - // not want to start from the start, by setting this env value, it will start from the UUID - // that follows. - @Value("${elasticsearch.query.startingUUID:#{null}}") - protected String startingUUID; protected FIFOCache> cache; protected RestTemplate indexerRestTemplate; @@ -420,8 +415,8 @@ public boolean isMetadataRecordsCountLessThan(int c) { backoff = @Backoff(delay = 1500L) ) @Override - public Iterable getAllMetadataRecords() { - SearchRequest req = createSearchAllUUID(startingUUID); + public Iterable getAllMetadataRecords(String beginWithUUid) { + SearchRequest req = createSearchAllUUID(beginWithUUid); try { final AtomicReference lastUUID = new AtomicReference<>(null); final AtomicReference> response = diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerService.java b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerService.java index 3f152733..4fb77267 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerService.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerService.java @@ -21,7 +21,7 @@ interface Callback { } CompletableFuture> indexMetadata(String metadataValues) throws IOException, FactoryException, TransformException, JAXBException; ResponseEntity deleteDocumentByUUID(String uuid) throws IOException; - List indexAllMetadataRecordsFromGeoNetwork(boolean confirm, Callback callback) throws IOException; + List indexAllMetadataRecordsFromGeoNetwork(String beginWithUuid, boolean confirm, Callback callback) throws IOException; Hit getDocumentByUUID(String uuid) throws IOException; boolean isMetadataPublished(String uuid); boolean isGeoNetworkInstanceReinstalled(long portalIndexDocumentsCount); diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java index 446c39e2..2b46b961 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java @@ -251,21 +251,25 @@ public ResponseEntity deleteDocumentByUUID(String uuid) throws IOExcepti } } - public List indexAllMetadataRecordsFromGeoNetwork(boolean confirm, Callback callback) throws IOException { + public List indexAllMetadataRecordsFromGeoNetwork(String beginWithUuid, boolean confirm, Callback callback) throws IOException { if (!confirm) { throw new IndexAllRequestNotConfirmedException("Please confirm that you want to index all metadata records from GeoNetwork"); } - // recreate index from mapping JSON file - elasticSearchIndexService.createIndexFromMappingJSONFile(AppConstants.PORTAL_RECORDS_MAPPING_JSON_FILE, indexName); - - log.info("Indexing all metadata records from GeoNetwork"); + if(beginWithUuid == null) { + log.info("Indexing all metadata records from GeoNetwork"); + // recreate index from mapping JSON file + elasticSearchIndexService.createIndexFromMappingJSONFile(AppConstants.PORTAL_RECORDS_MAPPING_JSON_FILE, indexName); + } + else { + log.info("Resume indexing records from GeoNetwork at {}", beginWithUuid); + } BulkRequest.Builder bulkRequest = new BulkRequest.Builder(); List results = new ArrayList<>(); long dataSize = 0; - for (String metadataRecord : geoNetworkResourceService.getAllMetadataRecords()) { + for (String metadataRecord : geoNetworkResourceService.getAllMetadataRecords(beginWithUuid)) { if(metadataRecord != null) { try { // get mapped metadata values from GeoNetwork to STAC collection schema diff --git a/indexer/src/test/java/au/org/aodn/esindexer/BaseTestClass.java b/indexer/src/test/java/au/org/aodn/esindexer/BaseTestClass.java index 896eb5f9..281a78ba 100644 --- a/indexer/src/test/java/au/org/aodn/esindexer/BaseTestClass.java +++ b/indexer/src/test/java/au/org/aodn/esindexer/BaseTestClass.java @@ -186,8 +186,6 @@ public void login() { } public void deleteRecord(String... uuids) { - CountDownLatch latch = new CountDownLatch(1); - HttpEntity requestEntity = getRequestEntity(null); // retry the request if the server is not ready yet (sometimes will return 403 and can be resolved by retrying ) diff --git a/indexer/src/test/java/au/org/aodn/esindexer/service/GeoNetworkServiceTests.java b/indexer/src/test/java/au/org/aodn/esindexer/service/GeoNetworkServiceTests.java index 24341ef7..3cd6c3ef 100644 --- a/indexer/src/test/java/au/org/aodn/esindexer/service/GeoNetworkServiceTests.java +++ b/indexer/src/test/java/au/org/aodn/esindexer/service/GeoNetworkServiceTests.java @@ -62,6 +62,7 @@ public void setup() throws IOException { dockerComposeContainer.getServicePort(GeoNetworkSearchTestConfig.GN_NAME, GeoNetworkSearchTestConfig.GN_PORT)) ); clearElasticIndex(INDEX_NAME); + triggerIndexer(getRequestEntity(null), true); } @AfterEach @@ -71,7 +72,7 @@ public void clear() throws IOException { /** * We need to make sure this works before you can do any meaningful transformation * - * @throws IOException + * @throws IOException - Not expect to throw */ @Test @Order(1) @@ -84,7 +85,7 @@ public void verifyInsertMetadataWorks() throws IOException { Assertions.assertFalse(geoNetworkService.isMetadataRecordsCountLessThan(1), "Compare false"); Assertions.assertTrue(geoNetworkService.isMetadataRecordsCountLessThan(2), "Compare true"); - Iterable i = geoNetworkService.getAllMetadataRecords(); + Iterable i = geoNetworkService.getAllMetadataRecords(null); for (String x : i) { if (x != null) { @@ -193,7 +194,7 @@ public void verifyAllMetadataRecords() throws IOException { insertMetadataRecords("9e5c3031-a026-48b3-a153-a70c2e2b78b9", "classpath:canned/sample1.xml"); insertMetadataRecords("830f9a83-ae6b-4260-a82a-24c4851f7119", "classpath:canned/sample2.xml"); - Iterable i = geoNetworkService.getAllMetadataRecords(); + Iterable i = geoNetworkService.getAllMetadataRecords(null); // The content verified above, just make sure it returned the correct number int count = 0; @@ -236,7 +237,7 @@ public void verifyAllMetadataRecordWithPage() throws IOException, JAXBException insertMetadataRecords(UUID6, "classpath:canned/sample6.xml"); insertMetadataRecords(UUID7, "classpath:canned/sample7.xml"); - Iterable i = geoNetworkService.getAllMetadataRecords(); + Iterable i = geoNetworkService.getAllMetadataRecords(null); final List xml = new ArrayList<>(); for(String x : i) { @@ -348,7 +349,7 @@ public void verifyRetryOnGeonetworkBusyWorks() throws IOException, JAXBException geoNetworkService.setGn4ElasticClient(spyClient); // Should not flow exception and retry correctly given some call throw IOException - Iterable i = geoNetworkService.getAllMetadataRecords(); + Iterable i = geoNetworkService.getAllMetadataRecords(null); // It should handle retry internally and without throwing exception for(String x : i) { diff --git a/indexer/src/test/java/au/org/aodn/esindexer/service/IndexerServiceTests.java b/indexer/src/test/java/au/org/aodn/esindexer/service/IndexerServiceTests.java index f0478248..bb05c6b0 100644 --- a/indexer/src/test/java/au/org/aodn/esindexer/service/IndexerServiceTests.java +++ b/indexer/src/test/java/au/org/aodn/esindexer/service/IndexerServiceTests.java @@ -102,7 +102,7 @@ public void verifyGetDocumentCount() throws IOException { insertMetadataRecords(uuid1, "classpath:canned/sample2.xml"); insertMetadataRecords(uuid2, "classpath:canned/sample1.xml"); - indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null); + indexerService.indexAllMetadataRecordsFromGeoNetwork(null, true, null); // The sample1 geometry have error [1:9695] failed to parse field [summaries.proj:geometry] of type [geo_shape] // ErrorCause: {"type":"illegal_argument_exception","reason":"Polygon self-intersection at lat=57.0 lon=-66.0"} @@ -123,7 +123,7 @@ public void verifyDeleteDocumentByUUID() throws IOException { insertMetadataRecords(uuid1, "classpath:canned/sample2.xml"); insertMetadataRecords(uuid2, "classpath:canned/sample3.xml"); - indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null); + indexerService.indexAllMetadataRecordsFromGeoNetwork(null, true, null); Assertions.assertEquals(2L, elasticSearchIndexService.getDocumentsCount(INDEX_NAME), "Doc count correct"); // Only 2 doc in elastic, if we delete it then should be zero @@ -144,7 +144,7 @@ public void verifyGetDocumentByUUID() throws IOException { insertMetadataRecords(uuid, "classpath:canned/sample4.xml"); - indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null); + indexerService.indexAllMetadataRecordsFromGeoNetwork(null, true, null); Hit objectNodeHit = indexerService.getDocumentByUUID(uuid); String test = String.valueOf(Objects.requireNonNull(objectNodeHit.source())); @@ -175,7 +175,7 @@ public void verifyAssociatedRecordIndexer() throws IOException{ insertMetadataRecords(siblingId, "classpath:canned/associated/sibling.xml"); insertMetadataRecords(childId, "classpath:canned/associated/child.xml"); - indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null); + indexerService.indexAllMetadataRecordsFromGeoNetwork(null, true, null); var targetResult = indexerService.getDocumentByUUID(targetRecordId); String resultJson = String.valueOf(Objects.requireNonNull(targetResult.source())); @@ -203,7 +203,7 @@ public void verifyLogoLinkAddedOnIndex() throws IOException { insertMetadataRecords(uuid, "classpath:canned/sample5.xml"); - indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null); + indexerService.indexAllMetadataRecordsFromGeoNetwork(null,true, null); Hit objectNodeHit = indexerService.getDocumentByUUID(uuid); String test = String.valueOf(Objects.requireNonNull(objectNodeHit.source())); @@ -229,7 +229,7 @@ public void verifyThumbnailLinkAddedOnIndex() throws IOException { insertMetadataRecords(uuid, "classpath:canned/sample6.xml"); - indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null); + indexerService.indexAllMetadataRecordsFromGeoNetwork(null, true, null); Hit objectNodeHit = indexerService.getDocumentByUUID(uuid); String test = String.valueOf(Objects.requireNonNull(objectNodeHit.source())); @@ -256,7 +256,7 @@ public void verifyThumbnailLinkNullAddedOnIndex() throws IOException { insertMetadataRecords(uuid, "classpath:canned/sample7.xml"); - indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null); + indexerService.indexAllMetadataRecordsFromGeoNetwork(null,true, null); Hit objectNodeHit = indexerService.getDocumentByUUID(uuid); String test = String.valueOf(Objects.requireNonNull(objectNodeHit.source())); @@ -283,7 +283,7 @@ public void verifyAbstractPhrases() throws IOException { insertMetadataRecords(uuid, "classpath:canned/sample7.xml"); - indexerService.indexAllMetadataRecordsFromGeoNetwork(true, null); + indexerService.indexAllMetadataRecordsFromGeoNetwork(null,true, null); Hit objectNodeHit = indexerService.getDocumentByUUID(uuid); String test = Objects.requireNonNull(objectNodeHit.source()).toPrettyString(); From 90a214a267ceb63f04d1e6af7d2c013735892041 Mon Sep 17 00:00:00 2001 From: utas-raymondng Date: Tue, 3 Sep 2024 11:29:07 +1000 Subject: [PATCH 2/2] Add more info in messages --- .../org/aodn/esindexer/controller/IndexerController.java | 6 ++++-- .../au/org/aodn/esindexer/service/IndexerServiceImpl.java | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerController.java b/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerController.java index 760f2180..e4fdeb68 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerController.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/controller/IndexerController.java @@ -81,7 +81,7 @@ public ResponseEntity indexAllMetadataRecords( @Operation(security = { @SecurityRequirement(name = "X-API-Key") }, description = "Index all metadata records from GeoNetwork") public SseEmitter indexAllMetadataRecordsAsync( @RequestParam(value = "confirm", defaultValue = "false") Boolean confirm, - @RequestParam(value = "beginWith", required=false) String beginWithUuid) { + @RequestParam(value = "beginWithUuid", required=false) String beginWithUuid) { final SseEmitter emitter = new SseEmitter(0L); // 0L means no timeout; @@ -89,7 +89,7 @@ public SseEmitter indexAllMetadataRecordsAsync( @Override public void onProgress(Object update) { try { - log.info("Send update to client"); + log.info("Send update with content - {}", update.toString()); SseEmitter.SseEventBuilder event = SseEmitter.event() .data(update.toString()) .id(String.valueOf(update.hashCode())) @@ -98,6 +98,8 @@ public void onProgress(Object update) { emitter.send(event); } catch (IOException e) { + // In case of fail, try close the stream, if it cannot be closed. (likely stream terminated + // already, the load error out and we need to result from a particular uuid. emitter.completeWithError(e); } } diff --git a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java index 2b46b961..f297f6c2 100644 --- a/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java +++ b/indexer/src/main/java/au/org/aodn/esindexer/service/IndexerServiceImpl.java @@ -269,6 +269,7 @@ public List indexAllMetadataRecordsFromGeoNetwork(String beginWith List results = new ArrayList<>(); long dataSize = 0; + long total = 0; for (String metadataRecord : geoNetworkResourceService.getAllMetadataRecords(beginWithUuid)) { if(metadataRecord != null) { try { @@ -302,13 +303,15 @@ public List indexAllMetadataRecordsFromGeoNetwork(String beginWith ) ); dataSize += size; + total++; if(callback != null) { callback.onProgress( String.format( - "Add uuid %s to batch, current batch size is %s", + "Add uuid %s to batch, batch size is %s, total is %s", mappedMetadataValues.getUuid(), - dataSize) + dataSize, + total) ); }