Skip to content

Commit

Permalink
Extract specific version from current
Browse files Browse the repository at this point in the history
  • Loading branch information
vietnguyengit committed Dec 9, 2024
1 parent d3a135c commit 137366b
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 13,237 deletions.
5 changes: 0 additions & 5 deletions ardcvocabs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,6 @@
<groupId>org.springframework.retry</groupId>
<artifactId>spring-retry</artifactId>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.18.1</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package au.org.aodn.ardcvocabs.model;

import lombok.Getter;

@Getter
public enum ArdcCurrentPaths {
PARAMETER_VOCAB(
"/aodn-parameter-category-vocabulary/current/concept.json",
"/aodn-discovery-parameter-vocabulary/current/concept.json"
),
PLATFORM_VOCAB(
"/aodn-platform-category-vocabulary/current/concept.json",
"/aodn-platform-vocabulary/current/concept.json"
),
ORGANISATION_VOCAB(
"/aodn-organisation-category-vocabulary/current/concept.json",
"/aodn-organisation-vocabulary/current/concept.json"
);


private final String categoryCurrent;
private final String vocabCurrent;

ArdcCurrentPaths(String categoryRoot, String vocabRoot) {
String rootHostname = "https://vocabs.ardc.edu.au/repository/api/lda/aodn";
this.categoryCurrent = rootHostname + categoryRoot;
this.vocabCurrent = rootHostname + vocabRoot;
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
package au.org.aodn.ardcvocabs.service;

import au.org.aodn.ardcvocabs.model.ArdcRootPaths;
import au.org.aodn.ardcvocabs.model.ArdcCurrentPaths;
import au.org.aodn.ardcvocabs.model.PathName;
import au.org.aodn.ardcvocabs.model.VocabApiPaths;
import au.org.aodn.ardcvocabs.model.VocabModel;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.TextNode;
import jakarta.annotation.PostConstruct;
import lombok.Getter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
Expand All @@ -37,44 +32,43 @@ public class ArdcVocabServiceImpl implements ArdcVocabService {
protected RestTemplate restTemplate;
protected RetryTemplate retryTemplate;

protected static final String VERSION_REGEX = "^version-\\d+-\\d+$";
protected static final String VERSION_REGEX = "/(version-\\d+-\\d+)(?:/|$)";

public Map<String, Map<PathName, String>> getResolvedPathCollection() {
Map<String, Map<PathName, String>> resolvedPathCollection = new HashMap<>();
for (ArdcRootPaths rootPath : ArdcRootPaths.values()) {
for (ArdcCurrentPaths currentPath : ArdcCurrentPaths.values()) {
try {
// Fetch HTML contents for category and vocab
String categoryRootHtmlContent = fetchHtmlContent(rootPath.getCategoryRoot());
String vocabRootHtmlContent = fetchHtmlContent(rootPath.getVocabRoot());
ObjectNode categoryCurrentContent = fetchCurrentContents(currentPath.getCategoryCurrent());
ObjectNode vocabCurrentContent = fetchCurrentContents(currentPath.getVocabCurrent());

if (categoryRootHtmlContent != null && vocabRootHtmlContent != null) {
if (categoryCurrentContent != null && vocabCurrentContent != null) {
// Extract versions
String categoryVersion = extractVersionFromHtmlContent(categoryRootHtmlContent);
String vocabVersion = extractVersionFromHtmlContent(vocabRootHtmlContent);
String categoryVersion = extractVersionFromCurrentContent(categoryCurrentContent);
String vocabVersion = extractVersionFromCurrentContent(vocabCurrentContent);

if (categoryVersion != null && vocabVersion != null) {
log.info("Fetched ARDC category version for {}: {}", rootPath.name(), categoryVersion);
log.info("Fetched ARDC vocab version for {}: {}", rootPath.name(), vocabVersion);
log.info("Fetched ARDC category version for {}: {}", currentPath.name(), categoryVersion);
log.info("Fetched ARDC vocab version for {}: {}", currentPath.name(), vocabVersion);

// Build and store resolved paths
Map<PathName, String> resolvedPaths = buildResolvedPaths(rootPath, categoryVersion, vocabVersion);
resolvedPathCollection.put(rootPath.name(), resolvedPaths);
Map<PathName, String> resolvedPaths = buildResolvedPaths(currentPath, categoryVersion, vocabVersion);
resolvedPathCollection.put(currentPath.name(), resolvedPaths);
} else {
log.error("Failed to extract versions for {}", rootPath.name());
log.error("Failed to extract versions for {}", currentPath.name());
}
} else {
log.error("Failed to fetch HTML content for {}", rootPath.name());
log.error("Failed to fetch HTML content for {}", currentPath.name());
}
} catch (Exception e) {
log.error("Error initialising versions for {}: {}", rootPath.name(), e.getMessage(), e);
log.error("Error initialising versions for {}: {}", currentPath.name(), e.getMessage(), e);
}
}
return resolvedPathCollection;
}

private String fetchHtmlContent(String url) {
private ObjectNode fetchCurrentContents(String url) {
try {
return restTemplate.getForObject(url, String.class);
return retryTemplate.execute(context -> restTemplate.getForObject(url, ObjectNode.class));
} catch (RestClientException e) {
log.error("Failed to fetch HTML content from URL {}: {}", url, e.getMessage());
} catch (Exception e) {
Expand All @@ -83,10 +77,10 @@ private String fetchHtmlContent(String url) {
return null;
}

protected Map<PathName, String> buildResolvedPaths(ArdcRootPaths rootPath, String categoryVersion, String vocabVersion) {
protected Map<PathName, String> buildResolvedPaths(ArdcCurrentPaths currentPaths, String categoryVersion, String vocabVersion) {
Map<PathName, String> resolvedPaths = new HashMap<>();
for (VocabApiPaths vocabApiPath : VocabApiPaths.values()) {
if (rootPath.name().equals(vocabApiPath.name())) {
if (currentPaths.name().equals(vocabApiPath.name())) {
resolvedPaths.put(PathName.categoryApi, String.format(vocabApiPath.getCategoryApiTemplate(), categoryVersion));
resolvedPaths.put(PathName.categoryDetailsApi, String.format(vocabApiPath.getCategoryDetailsTemplate(), categoryVersion, "%s"));
resolvedPaths.put(PathName.vocabApi, String.format(vocabApiPath.getVocabApiTemplate(), vocabVersion));
Expand All @@ -96,31 +90,24 @@ protected Map<PathName, String> buildResolvedPaths(ArdcRootPaths rootPath, Strin
return resolvedPaths;
}

protected static String extractVersionFromHtmlContent(String htmlContent) {
if (htmlContent != null && !htmlContent.isEmpty()) {
// Parse HTML content with Jsoup
Document doc = Jsoup.parse(htmlContent);

// Extract the first h4 element
// has(.box-tag.box-tag-green) query will ensure to select only the div element that has "Current" indicator
Element firstH4 = doc.selectFirst("div.col-md-4.panel-body:has(.box-tag.box-tag-green) h4:first-of-type");

if (firstH4 != null) {
String version = firstH4.text()
.toLowerCase()
.replaceAll("[ .]", "-");
// Validate the version format
if (version.matches(VERSION_REGEX)) {
protected String extractVersionFromCurrentContent(ObjectNode currentContent) {
if (currentContent != null && !currentContent.isEmpty()) {
JsonNode node = currentContent.get("result");
if (!about.apply(node).isEmpty()) {
Pattern pattern = Pattern.compile(VERSION_REGEX);
Matcher matcher = pattern.matcher(about.apply(node));

if (matcher.find()) {
String version = matcher.group(1);
log.info("Valid Version Found: {}", version);
return version;
} else {
log.warn("Version does not match the required format: {}", version);
log.warn("Version does not match the required format: {}", about.apply(node));
}
} else {
log.warn("No matching h4 element found in the document.");
}

} else {
log.warn("HTML content is empty or null.");
log.warn("Current content is empty or null.");
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,36 +226,6 @@ public void init() {
Mockito.reset(mockRestTemplate);
}

@Test
void testFetchVersionWithCannedHtml() throws Exception {
String mockHtmlContent;
String version;

mockHtmlContent = readResourceFile("/databag/ardc/viewById22.html");
version = ArdcVocabServiceImpl.extractVersionFromHtmlContent(mockHtmlContent);
assertEquals("version-1-6", version, "Expected parsed version for viewById22.html is 'version-1-6'");

mockHtmlContent = readResourceFile("/databag/ardc/viewById24.html");
version = ArdcVocabServiceImpl.extractVersionFromHtmlContent(mockHtmlContent);
assertEquals("version-2-1", version, "Expected parsed version for viewById24.html is 'version-2-1'");

mockHtmlContent = readResourceFile("/databag/ardc/viewById25.html");
version = ArdcVocabServiceImpl.extractVersionFromHtmlContent(mockHtmlContent);
assertEquals("version-6-1", version, "Expected parsed version for viewById25.html is 'version-6-1'");

mockHtmlContent = readResourceFile("/databag/ardc/viewById26.html");
version = ArdcVocabServiceImpl.extractVersionFromHtmlContent(mockHtmlContent);
assertEquals("version-1-2", version, "Expected parsed version for viewById26.html is 'version-1-2'");

mockHtmlContent = readResourceFile("/databag/ardc/viewById28.html");
version = ArdcVocabServiceImpl.extractVersionFromHtmlContent(mockHtmlContent);
assertEquals("version-2-5", version, "Expected parsed version for viewById28.html is 'version-2-5'");

mockHtmlContent = readResourceFile("/databag/ardc/viewById29.html");
version = ArdcVocabServiceImpl.extractVersionFromHtmlContent(mockHtmlContent);
assertEquals("version-2-5", version, "Expected parsed version for viewById29.html is 'version-2-5'");
}

@Test
public void verifyParameterVocab() throws IOException, JSONException {

Expand Down
Loading

0 comments on commit 137366b

Please sign in to comment.