Skip to content

Commit

Permalink
Merge pull request #137 from aodn/bugs/5842-handle-malformed-temporal
Browse files Browse the repository at this point in the history
Change the converter logic to handle malform date
  • Loading branch information
utas-raymondng authored Sep 13, 2024
2 parents 603ae4e + a8714ff commit 1c343ca
Show file tree
Hide file tree
Showing 18 changed files with 675 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class RankingServiceImpl implements RankingService {
protected static Logger logger = LogManager.getLogger(RankingServiceImpl.class);

public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) {
Integer total = 0;
int total = 0;

/*
* The implementation of this method can be adjusted
Expand All @@ -31,27 +31,27 @@ public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) {
* Total: 100 points
* */

if (stacCollectionModel.getTitle() != null && !stacCollectionModel.getTitle().equals("")) {
if (stacCollectionModel.getTitle() != null && !stacCollectionModel.getTitle().isBlank()) {
logger.debug("Title found");
total += 15;
}

if (stacCollectionModel.getDescription() != null && !stacCollectionModel.getDescription().equals("")) {
if (stacCollectionModel.getDescription() != null && !stacCollectionModel.getDescription().isBlank()) {
logger.debug("Description found");
total += 15;
}

if (stacCollectionModel.getExtent().getBbox() != null && stacCollectionModel.getExtent().getBbox().size() > 0) {
if (stacCollectionModel.getExtent().getBbox() != null && !stacCollectionModel.getExtent().getBbox().isEmpty()) {
logger.debug("Extent found");
total += 10;
}

if (stacCollectionModel.getExtent().getTemporal() != null && stacCollectionModel.getExtent().getTemporal().size() > 0) {
if (stacCollectionModel.getExtent().getTemporal() != null && !stacCollectionModel.getExtent().getTemporal().isEmpty()) {
logger.debug("Temporal found");
total += 10;
}

if (stacCollectionModel.getLinks() != null && stacCollectionModel.getLinks().size() > 0) {
if (stacCollectionModel.getLinks() != null && !stacCollectionModel.getLinks().isEmpty()) {
if (stacCollectionModel.getLinks().size() <= 2) {
logger.debug("Links found with size: " + stacCollectionModel.getLinks().size());
total += 10;
Expand All @@ -64,7 +64,7 @@ public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) {
}
}

if (stacCollectionModel.getThemes() != null && stacCollectionModel.getThemes().size() > 0) {
if (stacCollectionModel.getThemes() != null && !stacCollectionModel.getThemes().isEmpty()) {
if (stacCollectionModel.getThemes().size() <= 2) {
logger.debug("Themes found with size: " + stacCollectionModel.getThemes().size());
total += 10;
Expand All @@ -77,7 +77,7 @@ public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) {
}
}

if (stacCollectionModel.getContacts() != null && stacCollectionModel.getContacts().size() > 0) {
if (stacCollectionModel.getContacts() != null && !stacCollectionModel.getContacts().isEmpty()) {
logger.debug("Contacts found");
total += 10;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@

import java.io.IOException;
import java.math.BigDecimal;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.*;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.function.Function;
Expand Down Expand Up @@ -130,13 +128,13 @@ List<String[]> createExtentTemporal(MDMetadataType source) {
if (pair0.isEmpty()) {
pair0 = safeGet(() -> timePeriodType.getBeginPosition().getValue().get(0));
}
pair0.ifPresent(pair -> temporalPair[0] = convertDateToZonedDateTime(this.mapUUID(source), pair));
pair0.ifPresent(pair -> temporalPair[0] = convertDateToZonedDateTime(this.mapUUID(source), pair, true));

var pair1 = safeGet(() -> timePeriodType.getEnd().getTimeInstant().getTimePosition().getValue().get(0));
if (pair1.isEmpty()) {
pair1 = safeGet(() -> timePeriodType.getEndPosition().getValue().get(0));
}
pair1.ifPresent(pair -> temporalPair[1] = convertDateToZonedDateTime(this.mapUUID(source), pair));
pair1.ifPresent(pair -> temporalPair[1] = convertDateToZonedDateTime(this.mapUUID(source), pair, false));
}

result.add(temporalPair);
Expand All @@ -145,25 +143,65 @@ List<String[]> createExtentTemporal(MDMetadataType source) {
}
return result;
}

private String convertDateToZonedDateTime(String uuid, String inputDateString) {

String inputDateTimeString = inputDateString;
if (!inputDateString.contains("T")) {
inputDateTimeString += "T00:00:00";
}

/**
* If the date missing month / day / time then we will add it back by making it cover a range that is as wide as
* possible. So for example if only year then it will be first date of year and end date of that year.
*
* @param uuid - The uuid of the record
* @param dateStr - The date value in the XML
* @param isStartDate - Is it processing start date?
* @return - Well format date time string
*/
private String convertDateToZonedDateTime(String uuid, String dateStr, boolean isStartDate) {
ZonedDateTime utcZonedDateTime = null;
String convertedDateTime = null;
try {
ZonedDateTime zonedDateTime = ZonedDateTime.parse(inputDateTimeString, TemporalUtils.TIME_FORMATTER.withZone(ZoneId.of(timeZoneId)));
// Case 1: Date and Time (e.g., "2024-09-10T10:15:30")
if (dateStr.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}")) {
// Do nothing
convertedDateTime = dateStr;
ZonedDateTime zt = ZonedDateTime.parse(convertedDateTime, TemporalUtils.TIME_FORMATTER.withZone(ZoneId.of(timeZoneId)));
utcZonedDateTime = zt.withZoneSameInstant(ZoneOffset.UTC);
}
// Case 2: Full Date (e.g., "2024-09-10"), depends on it is start or end, try to cover the full range
else if (dateStr.matches("\\d{4}-\\d{2}-\\d{2}")) {
convertedDateTime = isStartDate ? dateStr + "T00:00:00" : dateStr + "T23:59:59";
ZonedDateTime zt = ZonedDateTime.parse(convertedDateTime, TemporalUtils.TIME_FORMATTER.withZone(ZoneId.of(timeZoneId)));
utcZonedDateTime = zt.withZoneSameInstant(ZoneOffset.UTC);
}
// Case 3: Year and Month (e.g., "2024-09"), depends on it is start or end, try to cover the full range
else if (dateStr.matches("\\d{4}-\\d{2}")) {
YearMonth yearMonth = YearMonth.parse(dateStr);
LocalDateTime ld = isStartDate ?
yearMonth.atDay(1).atTime(0, 0, 0) :
yearMonth.atEndOfMonth().atTime(23, 59, 59);

ZonedDateTime zt = ld.atZone(ZoneId.of(timeZoneId));
utcZonedDateTime = zt.withZoneSameInstant(ZoneOffset.UTC);
}
// Case 4: Year only (e.g., "2024"), depends on it is start or end, try to cover the full range
else if (dateStr.matches("\\d{4}")) {
YearMonth yearMonth = isStartDate ? YearMonth.parse(dateStr + "-01") : YearMonth.parse(dateStr + "-12");
LocalDateTime ld = isStartDate ?
yearMonth.atDay(1).atTime(0, 0, 0) :
yearMonth.atEndOfMonth().atTime(23, 59, 59);

ZonedDateTime zt = ld.atZone(ZoneId.of(timeZoneId));
utcZonedDateTime = zt.withZoneSameInstant(ZoneOffset.UTC);
}

// Convert to UTC
ZonedDateTime utcZonedDateTime = zonedDateTime.withZoneSameInstant(ZoneOffset.UTC);
DateTimeFormatter outputFormatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME;

return utcZonedDateTime.format(outputFormatter);
// Convert to UTC
if(utcZonedDateTime != null) {
return utcZonedDateTime.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
}
else {
logger.warn("Unable to convert date to ISO_OFFSET_DATE_TIME: {} for record {}", dateStr, uuid);
return null;
}
}
catch (Exception e) {
logger.warn("Unable to convert date to ISO_OFFSET_DATE_TIME: {} for record {}", inputDateString, uuid);
logger.warn("Unable to convert date to ISO_OFFSET_DATE_TIME: {} for record {}", dateStr, uuid);
return null;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ public void verifyLicenseCorrect() throws IOException, JSONException {
var a = lastRequest.get().document().toString();
Map<?,?> content3 = objectMapper.readValue(lastRequest.get().document().toString(), Map.class);
String out3 = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(content3);
JSONAssert.assertEquals(objectMapper.readTree(expected3).toPrettyString(),
JSONAssert.assertEquals(
objectMapper.readTree(expected3).toPrettyString(),
objectMapper.readTree(out3.strip()).toPrettyString(),
JSONCompareMode.STRICT);
}
Expand Down Expand Up @@ -422,4 +423,25 @@ public void verifyAbstractCitationNullWorks() throws IOException, JSONException
objectMapper.readTree(out.strip()).toPrettyString(),
JSONCompareMode.STRICT
);
}}
}
/**
* The date field contains year only or year-month only. We need to handle this case.
*
* @throws IOException - Do not expect to throw
*/
@Test
public void verifyMalformDateTimeWorks() throws IOException, JSONException {
String xml = readResourceFile("classpath:canned/sample_malform_date.xml");
String expected = readResourceFile("classpath:canned/sample_malform_date_stac.json");
indexerService.indexMetadata(xml);

Map<?,?> content = objectMapper.readValue(lastRequest.get().document().toString(), Map.class);
String out = objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(content);
log.info(out);
JSONAssert.assertEquals(
objectMapper.readTree(expected).toPrettyString(),
objectMapper.readTree(out.strip()).toPrettyString(),
JSONCompareMode.STRICT
);
}
}
6 changes: 3 additions & 3 deletions indexer/src/test/resources/canned/sample12_stac.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
"temporal": [
[
"2012-04-15T14:00:00Z",
"2012-04-29T14:00:00Z"
"2012-04-30T13:59:59Z"
],
[
"2012-04-15T14:00:00Z",
"2012-04-29T14:00:00Z"
"2012-04-30T13:59:59Z"
]
]
},
Expand Down Expand Up @@ -73,7 +73,7 @@
"temporal": [
{
"start": "2012-04-15T14:00:00Z",
"end": "2012-04-29T14:00:00Z"
"end": "2012-04-30T13:59:59Z"
}
]
},
Expand Down
6 changes: 3 additions & 3 deletions indexer/src/test/resources/canned/sample13_stac.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
"temporal": [
[
"2016-11-17T13:00:00Z",
"2016-11-19T13:00:00Z"
"2016-11-20T12:59:59Z"
],
[
"2016-11-17T13:00:00Z",
"2016-11-19T13:00:00Z"
"2016-11-20T12:59:59Z"
]
]
},
Expand Down Expand Up @@ -69,7 +69,7 @@
"temporal": [
{
"start": "2016-11-17T13:00:00Z",
"end": "2016-11-19T13:00:00Z"
"end": "2016-11-20T12:59:59Z"
}
]
},
Expand Down
4 changes: 2 additions & 2 deletions indexer/src/test/resources/canned/sample4_stac.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"description" : "This dataset contains the reconstructed time series of monthly mean aragonite, calcite and pH together with distribution of dissolved inorganic carbon (DIC), total alkalinity (ALK), sea surface temperature and salinity in the Australian region at a 1 degree resolution over the period 1870-2013.",
"extent" : {
"bbox" : [ [ 95.5, -44.5, 169.5, -0.5 ], [ 95.5, -44.5, 169.5, -0.5 ] ],
"temporal" : [ [ "1870-07-16T14:10:44Z", "2013-06-16T14:00:00Z" ], [ "1870-07-16T14:10:44Z", "2013-06-16T14:00:00Z" ] ]
"temporal" : [ [ "1870-07-16T14:10:44Z", "2013-06-17T13:59:59Z" ], [ "1870-07-16T14:10:44Z", "2013-06-17T13:59:59Z" ] ]
},
"summaries" : {
"score" : 95,
Expand All @@ -28,7 +28,7 @@
},
"temporal" : [ {
"start" : "1870-07-16T14:10:44Z",
"end" : "2013-06-16T14:00:00Z"
"end" : "2013-06-17T13:59:59Z"
} ],
"parameter_vocabs" : [ "ph (total scale) of the water body", "alkalinity", "carbon", "temperature", "salinity" ]
},
Expand Down
4 changes: 2 additions & 2 deletions indexer/src/test/resources/canned/sample5_stac.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"description" : "The Bonney Coast (BONC) HF ocean radar system covers an area of the Bonney Coast, South Australia, which has a recurring annual upwelling feature near to the coast that significantly changes the ecosystem from one of warm water originating in Western Australia, to one dominated by cold upwelling water from off the continental shelf. The dynamics of this area and the relationship between ocean circulation, chemistry and sediments control the larval species and the higher marine species and ecosystems in which they forage. The data from this site provide linking observations between the Southern Ocean and NSW through processes that occur on weekly to El Nino time scales. The BONC HF ocean radar system consists of two SeaSonde crossed loop direction finding stations located at Nora Creina (37.329 S 139.850 E) and Blackfellows Cave (37.940 S 140.457 E). These radars operate at a frequency of 5.211 MHz, with a bandwidth of 50 KHz, a maximum range of 200 Km and a range resolution of 3 Km. Within the HF radar coverage area surface currents are measured. This site was decommissioned in March 2017.",
"extent" : {
"bbox" : [ [ 138.2, -39.4, 140.8, -37.0 ], [ 138.2, -39.4, 140.8, -37.0 ] ],
"temporal" : [ [ "2010-01-21T01:00:00Z", "2017-03-26T13:00:00Z" ], [ "2010-01-21T01:00:00Z", "2017-03-26T13:00:00Z" ] ]
"temporal" : [ [ "2010-01-21T01:00:00Z", "2017-03-27T12:59:59Z" ], [ "2010-01-21T01:00:00Z", "2017-03-27T12:59:59Z" ] ]
},
"summaries" : {
"score" : 90,
Expand All @@ -27,7 +27,7 @@
},
"temporal" : [ {
"start" : "2010-01-21T01:00:00Z",
"end" : "2017-03-26T13:00:00Z"
"end" : "2017-03-27T12:59:59Z"
} ]
},
"contacts" : [ {
Expand Down
4 changes: 2 additions & 2 deletions indexer/src/test/resources/canned/sample6_stac.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"description" : "This record describes the End of Voyage (EOV) data archive from the Marine National Facility (MNF) RV Investigator voyage IN2024_V01, titled \"Multidisciplinary Investigations of the Southern Ocean (MISO): linking physics, biogeochemistry, plankton, aerosols, clouds, and climate.\" The voyage took place between January 02, 2024 and March 05, 2024 (AEST), departing from Hobart and returning to Fremantle.\n\n For further information please refer to the voyage documentation links.\n\n Instruments used and data collected include:\n Regular measurements:\n Lowered ADCP (LADCP), Acoustic Doppler Current Profiler (ADCP; 75, 150 KHz ), Greenhouse Gas Analysers (Picarro), Cloud Condensation Nuclei counter (CCN), Condensation Particle Counters (CPC), Disdrometer, Radon sensor, Scanning Mobility Particle Sizers (SMPS), CTD, Hydrochemistry, Triaxus, Fisheries Echosounder (EK80), Multibeam Echosounder (EM710, EM122), Sub-bottom Profiler (SBP120), GPS Positioning System, Doppler Velocity Log, Thermosalinographs (TSG), Fluorometer, Oxygen Optode, pCO2, Multiangle Absorption Photometer (MAAP), Ozone Sensor, Nephelometer, Atmospheric Temperature, Humidity, Pressure, Wind and Rain sensors, Photosynthetically Active Radiation (PAR) sensor, Precision Infrared Radiometer (PIR), Precision Spectral Pyranometer (PSP), Starboard and Portside Radiometers, Air Sampler, Ultra Short BaseLine Underwater Positioning System (USBL), Weather Radar, Expendable Bathythermographs (XBTs).\n\n Voyage-specific measurements:\n Black Carbon sensor (Aethalometer), Mobility particle size spectrometer (MPSS), Bongo Net, Chemical Ionisation Mass Spectrometer (CIMS), Cloud Radar (BASTA), Fast Repetition Rate Chlorophyll-a Fluorometer (FRRf), Mini Micro-Pulse LIDAR (miniMPL), Micro Rain Radar (MRR), Neutral Cluster Air Ion Spectrometer (NAIS), Proton-Transfer-Reaction Mass Spectrometry (PTR-MS), Radiosondes, Cloud and Aerosol Backscatter Lidar (RMAN), Stabilised Platform, Mercury Analyser (Tekran), Time of Flight Aerosol Chemical Speciation Monitor (ToF-ACSM), Water Vapor Radiometer (WVR), Aerosol mass spectrometer (AMS), Core Argo floats, Biogeochemical (BGC) Argo floats, Near-surface Drifters, In situ pumps (ISPs), Ice Nucleating Particles (INPs), Ozone Sensor, Trace Metal Aerosol Sampling, Trace Metal CTD Rosette and Bottles, Organic Sulfur Sequential Chemical Analysis Robot (OSSCAR), Omics data and various biological data.\n\n The archive for the IN2024_V01 EOV raw data is curated by the CSIRO National Collections and Marine Infrastructure (NCMI) Information and Data Centre (IDC) in Hobart, with a permanent archive at the CSIRO Data Access Portal (https://data.csiro.au/), providing access to voyage participants and processors of the data collected on the voyage.\n\n All voyage documentation is available electronically to MNF support via the local network. Applications to access voyage documentation by non-CSIRO participants can be made via [email protected].\n\n All processed data from this voyage are made publicly available through the MNF Data Trawler (in the related links).",
"extent" : {
"bbox" : [ [ 113.2462, -67.0026, 151.4171, -31.9323 ], [ 113.2462, -67.0026, 151.4171, -31.9323 ] ],
"temporal" : [ [ "2024-01-01T13:00:00Z", "2024-03-04T13:00:00Z" ], [ "2024-01-01T13:00:00Z", "2024-03-04T13:00:00Z" ] ]
"temporal" : [ [ "2024-01-01T13:00:00Z", "2024-03-05T12:59:59Z" ], [ "2024-01-01T13:00:00Z", "2024-03-05T12:59:59Z" ] ]
},
"summaries" : {
"score" : 100,
Expand All @@ -27,7 +27,7 @@
},
"temporal" : [ {
"start" : "2024-01-01T13:00:00Z",
"end" : "2024-03-04T13:00:00Z"
"end" : "2024-03-05T12:59:59Z"
} ],
"parameter_vocabs" : [ "ocean biota", "bathymetry", "density", "water pressure", "current", "temperature", "salinity" ],
"platform_vocabs" : [ "research vessel" ]
Expand Down
Loading

0 comments on commit 1c343ca

Please sign in to comment.