From c27e910da84f1dc829cca480e8ca11408c7ba688 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Thu, 5 Dec 2024 15:38:42 -0500 Subject: [PATCH 1/8] remove duplicate DatasetFieldTypes --- .../iq/dataverse/util/json/JsonPrinter.java | 25 +++++++--- .../dataverse/util/json/JsonPrinterTest.java | 49 +++++++++++++++++++ 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 91af13c79a3..bb0c8e10c82 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -642,17 +642,19 @@ public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printO .add("displayName", metadataBlock.getDisplayName()) .add("displayOnCreate", metadataBlock.isDisplayOnCreate()); - Set datasetFieldTypes; + List datasetFieldTypesList; if (ownerDataverse != null) { - datasetFieldTypes = new TreeSet<>(datasetFieldService.findAllInMetadataBlockAndDataverse( - metadataBlock, ownerDataverse, printOnlyDisplayedOnCreateDatasetFieldTypes)); + datasetFieldTypesList = datasetFieldService.findAllInMetadataBlockAndDataverse( + metadataBlock, ownerDataverse, printOnlyDisplayedOnCreateDatasetFieldTypes); } else { - datasetFieldTypes = printOnlyDisplayedOnCreateDatasetFieldTypes - ? new TreeSet<>(datasetFieldService.findAllDisplayedOnCreateInMetadataBlock(metadataBlock)) - : new TreeSet<>(metadataBlock.getDatasetFieldTypes()); + datasetFieldTypesList = printOnlyDisplayedOnCreateDatasetFieldTypes + ? datasetFieldService.findAllDisplayedOnCreateInMetadataBlock(metadataBlock) + : metadataBlock.getDatasetFieldTypes(); } + Set datasetFieldTypes = filterOutDuplicateDatasetFieldTypes(datasetFieldTypesList); + JsonObjectBuilder fieldsBuilder = Json.createObjectBuilder(); for (DatasetFieldType datasetFieldType : datasetFieldTypes) { fieldsBuilder.add(datasetFieldType.getName(), json(datasetFieldType, ownerDataverse)); @@ -662,6 +664,17 @@ public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printO return jsonObjectBuilder; } + // This will remove datasetFieldTypes that are in the list but also a child of another datasetFieldType in the list + // Prevents duplicate datasetFieldType information from being returned twice + // See: https://github.com/IQSS/dataverse/issues/10472 + private static Set filterOutDuplicateDatasetFieldTypes(List datasetFieldTypesList) { + // making a copy of the list as to not damage the original when we remove items + List datasetFieldTypes = new ArrayList<>(datasetFieldTypesList); + // exclude/remove datasetFieldTypes if datasetFieldType exists as a child of another datasetFieldType + datasetFieldTypesList.forEach(dsft -> dsft.getChildDatasetFieldTypes().forEach(c -> datasetFieldTypes.remove(c))); + return new TreeSet<>(datasetFieldTypes); + } + public static JsonArrayBuilder jsonDatasetFieldTypes(List fields) { JsonArrayBuilder fieldsJson = Json.createArrayBuilder(); for (DatasetFieldType field : fields) { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java index 7ec8e0b25f3..1987307637c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java @@ -25,6 +25,7 @@ import jakarta.json.JsonString; import edu.harvard.iq.dataverse.util.BundleUtil; +import org.assertj.core.util.Lists; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeEach; @@ -268,6 +269,54 @@ public void testDatasetContactWithPrivacy() { } + @Test + public void testDatasetFieldTypesWithChildren() { + MetadataBlock block = new MetadataBlock(); + block.setId(0L); + block.setName("citation"); + long id = 0L; + // create datasetFieldTypes + List datasetFieldTypes = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + DatasetFieldType dft = new DatasetFieldType(); + dft.setId(id++); + dft.setDisplayOrder(i); + dft.setMetadataBlock(block); + dft.setFieldType(FieldType.TEXT); + dft.setName("subType" + dft.getId()); + dft.setTitle(dft.getName()); + dft.setChildDatasetFieldTypes(Lists.emptyList()); + datasetFieldTypes.add(dft); + } + // add DatasetFieldType as children to another DatasetFieldType to test the suppression of duplicate data + // adding 3 and 4 as children of 2 + datasetFieldTypes.get(3).setParentDatasetFieldType(datasetFieldTypes.get(2)); + datasetFieldTypes.get(4).setParentDatasetFieldType(datasetFieldTypes.get(2)); + datasetFieldTypes.get(2).setChildDatasetFieldTypes(List.of(datasetFieldTypes.get(3), datasetFieldTypes.get(4))); + // adding 6 as child of 9 + datasetFieldTypes.get(6).setParentDatasetFieldType(datasetFieldTypes.get(9)); + datasetFieldTypes.get(9).setChildDatasetFieldTypes(List.of(datasetFieldTypes.get(6))); + + block.setDatasetFieldTypes(datasetFieldTypes); + + DatasetFieldServiceBean nullDFServiceBean = null; + JsonPrinter.injectSettingsService(new MockSettingsSvc(), nullDFServiceBean); + + JsonObject jsonObject = JsonPrinter.json(block).build(); + assertNotNull(jsonObject); + + System.out.println("json: " + JsonUtil.prettyPrint(jsonObject.toString())); + assertEquals("subType2 subType3", jsonObject.getJsonObject("fields").getJsonObject("subType2") + .getJsonObject("childFields").getJsonObject("subType3").getString("displayName")); + assertEquals("subType2 subType4", jsonObject.getJsonObject("fields").getJsonObject("subType2") + .getJsonObject("childFields").getJsonObject("subType4").getString("displayName")); + assertEquals("subType9 subType6", jsonObject.getJsonObject("fields").getJsonObject("subType9") + .getJsonObject("childFields").getJsonObject("subType6").getString("displayName")); + assertNull(jsonObject.getJsonObject("fields").getJsonObject("subType3")); + assertNull(jsonObject.getJsonObject("fields").getJsonObject("subType4")); + assertNull(jsonObject.getJsonObject("fields").getJsonObject("subType6")); + } + @Test public void testDataversePrinter() { Dataverse dataverse = new Dataverse(); From b2e449779cc141160d15070b18eb2a3a037c848b Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:20:50 -0500 Subject: [PATCH 2/8] fix tests afetr validating new results --- .../java/edu/harvard/iq/dataverse/api/DataversesIT.java | 6 +++--- .../java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index 76bb515beb2..d58c84a06d4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -927,7 +927,7 @@ public void testListMetadataBlocks() { .body("data.size()", equalTo(1)) .body("data[0].name", is("citation")) .body("data[0].fields.title.displayOnCreate", equalTo(true)) - .body("data[0].fields.size()", is(28)); + .body("data[0].fields.size()", is(10)); // 28 - 18 child duplicates Response setMetadataBlocksResponse = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("astrophysics"), apiToken); setMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); @@ -1007,11 +1007,11 @@ public void testListMetadataBlocks() { // Since the included property of notesText is set to false, we should retrieve the total number of fields minus one int citationMetadataBlockIndex = geospatialMetadataBlockIndex == 0 ? 1 : 0; listMetadataBlocksResponse.then().assertThat() - .body(String.format("data[%d].fields.size()", citationMetadataBlockIndex), equalTo(79)); + .body(String.format("data[%d].fields.size()", citationMetadataBlockIndex), equalTo(34)); // 79 minus 45 child duplicates // Since the included property of geographicCoverage is set to false, we should retrieve the total number of fields minus one listMetadataBlocksResponse.then().assertThat() - .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(10)); + .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(6)); // 10 - 4 child duplicates String actualGeospatialMetadataField1 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.geographicCoverage.name", geospatialMetadataBlockIndex)); String actualGeospatialMetadataField2 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.country.name", geospatialMetadataBlockIndex)); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java index 6e7061961f0..fe608a97ac8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java @@ -42,7 +42,7 @@ void testListMetadataBlocks() { // returnDatasetFieldTypes=true listMetadataBlocksResponse = UtilIT.listMetadataBlocks(false, true); - int expectedNumberOfMetadataFields = 80; + int expectedNumberOfMetadataFields = 35; // 80 - 45 child duplicates; listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", not(equalTo(null))) @@ -51,7 +51,8 @@ void testListMetadataBlocks() { // onlyDisplayedOnCreate=true and returnDatasetFieldTypes=true listMetadataBlocksResponse = UtilIT.listMetadataBlocks(true, true); - expectedNumberOfMetadataFields = 28; + listMetadataBlocksResponse.prettyPrint(); + expectedNumberOfMetadataFields = 10; // 28 - 18 child duplicates listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", not(equalTo(null))) From 9fb3dbe956fe69f0a7be17dd67cbd1dce7b42bcc Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Fri, 6 Dec 2024 13:45:16 -0500 Subject: [PATCH 3/8] add release note --- ...0472-review-modify-jsonprinter-for-datasetfieldtype.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/release-notes/10472-review-modify-jsonprinter-for-datasetfieldtype.md diff --git a/doc/release-notes/10472-review-modify-jsonprinter-for-datasetfieldtype.md b/doc/release-notes/10472-review-modify-jsonprinter-for-datasetfieldtype.md new file mode 100644 index 00000000000..f0b9c30c9cd --- /dev/null +++ b/doc/release-notes/10472-review-modify-jsonprinter-for-datasetfieldtype.md @@ -0,0 +1,8 @@ +### Json Printer Bug fix + +DatasetFieldTypes in MetadataBlock response that are also a child of another DatasetFieldType were being returned twice. The child DatasetFieldType was included in the "fields" object as well as in the "childFields" of it's parent DatasetFieldType. This fix suppresses the standalone object so only one instance of the DatasetFieldType is returned (in the "childFields" of its parent). +This fix changes the Json output of the API `/api/dataverses/{dataverseAlias}/metadatablocks` + +## Backward Incompatible Changes + +The Json response of API call `/api/dataverses/{dataverseAlias}/metadatablocks` will no longer include the DatasetFieldTypes in "fields" if they are children of another DatasetFieldType. The child DatasetFieldType will only be included in the "childFields" of it's parent DatasetFieldType. From 939d8e9e1cf349e138242d6b481f1a78f072fcce Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 18 Dec 2024 15:14:08 -0500 Subject: [PATCH 4/8] switch container demo tutorial from FAKE to perma #11107 --- doc/release-notes/11107-fake-to-perma-demo.md | 3 +++ doc/sphinx-guides/source/container/running/demo.rst | 5 +++++ docker/compose/demo/compose.yml | 12 ++++++------ 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 doc/release-notes/11107-fake-to-perma-demo.md diff --git a/doc/release-notes/11107-fake-to-perma-demo.md b/doc/release-notes/11107-fake-to-perma-demo.md new file mode 100644 index 00000000000..afb6b8a7917 --- /dev/null +++ b/doc/release-notes/11107-fake-to-perma-demo.md @@ -0,0 +1,3 @@ +### Demo/Eval Container Tutorial + +The demo/eval container tutorial has been updated to use the Permalink PID provider instead of the FAKE DOI Provider. See also #11107. diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst index b1945070714..2483d3217a5 100644 --- a/doc/sphinx-guides/source/container/running/demo.rst +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -160,6 +160,11 @@ Next, set up the UI toggle between English and French, again using the unblock k Stop and start the Dataverse container in order for the language toggle to work. +PID Providers ++++++++++++++ + +Dataverse supports multiple Persistent ID (PID) providers. The ``compose.yml`` file uses the Permalink PID provider. Follow :ref:`pids-configuration` to reconfigure as needed. + Next Steps ---------- diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index 60ed130612e..bc0fe9825ba 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -20,12 +20,12 @@ services: -Ddataverse.files.file1.type=file -Ddataverse.files.file1.label=Filesystem -Ddataverse.files.file1.directory=${STORAGE_DIR}/store - -Ddataverse.pid.providers=fake - -Ddataverse.pid.default-provider=fake - -Ddataverse.pid.fake.type=FAKE - -Ddataverse.pid.fake.label=FakeDOIProvider - -Ddataverse.pid.fake.authority=10.5072 - -Ddataverse.pid.fake.shoulder=FK2/ + -Ddataverse.pid.providers=perma1 + -Ddataverse.pid.default-provider=perma1 + -Ddataverse.pid.perma1.type=perma + -Ddataverse.pid.perma1.label=Perma1 + -Ddataverse.pid.perma1.authority=DV + -Ddataverse.pid.perma1.permalink.separator=/ #-Ddataverse.lang.directory=/dv/lang ports: - "8080:8080" # HTTP (Dataverse Application) From 339ce7aa67a6f8f95936f477b610a7affbd438c4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Dec 2024 13:34:54 -0500 Subject: [PATCH 5/8] don't create unneeded per docs --- src/main/java/edu/harvard/iq/dataverse/DataFile.java | 8 ++++++++ .../harvard/iq/dataverse/search/SolrIndexServiceBean.java | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 1a610d9ea6e..01c1a48e117 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -1142,4 +1142,12 @@ public boolean isDeaccessioned() { } return inDeaccessionedVersions; // since any published version would have already returned } + public boolean isInDatasetVersion(DatasetVersion version) { + for (FileMetadata fmd : getFileMetadatas()) { + if (fmd.getDatasetVersion().equals(version)) { + return true; + } + } + return false; + } } // end of class diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index e4d885276d0..aa27a051e98 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -155,7 +155,7 @@ private List constructDatafileSolrDocs(DataFile dataFile, Map desiredCards = searchPermissionsService.getDesiredCards(dataFile.getOwner()); for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataFile.getOwner())) { boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState()); - if (cardShouldExist) { + if (cardShouldExist && dataFile.isInDatasetVersion(datasetVersionFileIsAttachedTo)) { String solrIdStart = IndexServiceBean.solrDocIdentifierFile + dataFile.getId(); String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); String solrId = solrIdStart + solrIdEnd; From 4818383538fb5ca071954b0be15045d1f1886faa Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Dec 2024 14:18:48 -0500 Subject: [PATCH 6/8] add note about alternate fixes --- .../harvard/iq/dataverse/search/SolrIndexServiceBean.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index aa27a051e98..8b04b6c5b6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -155,6 +155,14 @@ private List constructDatafileSolrDocs(DataFile dataFile, Map desiredCards = searchPermissionsService.getDesiredCards(dataFile.getOwner()); for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataFile.getOwner())) { boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState()); + /* + * Since datasetVersionFileIsAttachedTo should be a draft or the most recent + * released one, it could be more efficient to stop the search through + * FileMetadatas after those two (versus continuing through all prior versions + * as in isInDatasetVersion). Alternately, perhaps filesToReIndexPermissionsFor + * should not combine the list of files for the different datsetversions into a + * single list to start with. + */ if (cardShouldExist && dataFile.isInDatasetVersion(datasetVersionFileIsAttachedTo)) { String solrIdStart = IndexServiceBean.solrDocIdentifierFile + dataFile.getId(); String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); From e16ff355fc56ace769f5c76669c9a42bc6db2d6d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 19 Dec 2024 14:35:20 -0500 Subject: [PATCH 7/8] release note --- doc/release-notes/11113-avoid-orphan-perm-docs.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 doc/release-notes/11113-avoid-orphan-perm-docs.md diff --git a/doc/release-notes/11113-avoid-orphan-perm-docs.md b/doc/release-notes/11113-avoid-orphan-perm-docs.md new file mode 100644 index 00000000000..4c52d72d7db --- /dev/null +++ b/doc/release-notes/11113-avoid-orphan-perm-docs.md @@ -0,0 +1,5 @@ +This release fixes a bug that caused Dataverse to generate unnecessary solr documents for files when a file is added/deleted from a draft dataset. These documents could accumulate and potentially impact performance. + +Assuming the upgrade to solr 9.7.0 also occurs in this release, there's nothing else needed for this PR. (Starting with a new solr insures the solr db is empty and that a reindex is already required.) + + From a067793eab56bc9280ac8dd7b0628d071f48926b Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:44:32 -0500 Subject: [PATCH 8/8] fix merged test --- src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index deb3354b481..960e3d2e3ac 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -1012,8 +1012,9 @@ public void testListMetadataBlocks() { // Since the included property of geographicCoverage is set to false, we should retrieve the total number of fields minus one listMetadataBlocksResponse.then().assertThat() - .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(6)); // 10 - 4 child duplicates + .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(2)); + listMetadataBlocksResponse = UtilIT.getMetadataBlock("geospatial"); String actualGeospatialMetadataField1 = listMetadataBlocksResponse.then().extract().path(String.format("data.fields['geographicCoverage'].name")); String actualGeospatialMetadataField2 = listMetadataBlocksResponse.then().extract().path(String.format("data.fields['geographicCoverage'].childFields['country'].name")); String actualGeospatialMetadataField3 = listMetadataBlocksResponse.then().extract().path(String.format("data.fields['geographicCoverage'].childFields['city'].name"));