Skip to content

Commit

Permalink
Merge pull request #139 from aodn/features/5233-change-ranking-algo
Browse files Browse the repository at this point in the history
Features/5233 change ranking algo
  • Loading branch information
HavierD authored Sep 16, 2024
2 parents 1c343ca + a0a0584 commit 934c5ab
Show file tree
Hide file tree
Showing 10 changed files with 149 additions and 155 deletions.
24 changes: 22 additions & 2 deletions indexer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,6 @@
</annotationProcessorPaths>
</configuration>
</plugin>

<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
Expand All @@ -274,7 +273,28 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>io.github.git-commit-id</groupId>
<artifactId>git-commit-id-maven-plugin</artifactId>
<executions>
<execution>
<id>get-the-git-infos</id>
<goals>
<goal>revision</goal>
</goals>
<phase>initialize</phase>
</execution>
</executions>
<configuration>
<generateGitPropertiesFile>true</generateGitPropertiesFile>
<generateGitPropertiesFilename>${project.build.outputDirectory}/git.properties</generateGitPropertiesFilename>
<includeOnlyProperties>
<includeOnlyProperty>^git.build.(time|version)$</includeOnlyProperty>
<includeOnlyProperty>^git.commit.id.(abbrev|full)$</includeOnlyProperty>
</includeOnlyProperties>
<commitIdGenerationMode>full</commitIdGenerationMode>
</configuration>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -1,85 +1,109 @@
package au.org.aodn.esindexer.service;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import au.org.aodn.stac.model.StacCollectionModel;

@Slf4j
@Service
public class RankingServiceImpl implements RankingService {

protected static Logger logger = LogManager.getLogger(RankingServiceImpl.class);
@Value("${app.ranking.citation.weight:15}")
protected int citationWeigth;

public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) {
int total = 0;
@Value("${app.ranking.license.weight:10}")
protected int licenseWeigth;

/*
* The implementation of this method can be adjusted
* Current scoring system is (well, I made it up! feel free to change it)
* 1. 15 points for title
* 2. 15 points for description
* 3. 10 points for extent geometry
* 4. 10 points for extent temporal
* 5a. 10 points for links with just 1-2 link
* * 5b. 15 points for links with 3-5 links
* * 5c. 20 points for links more than 5 links
* 6a. 10 points for themes with just 1-2 themes
* * 6b. 15 points for themes with 3-5 themes
* * 6c. 20 points for themes more than 5 themes
* 7. 10 points for contacts
* Total: 100 points
* */

if (stacCollectionModel.getTitle() != null && !stacCollectionModel.getTitle().isBlank()) {
logger.debug("Title found");
total += 15;
}
@Value("${app.ranking.description.weight:0.01F}")
protected float descriptionWeigth;

if (stacCollectionModel.getDescription() != null && !stacCollectionModel.getDescription().isBlank()) {
logger.debug("Description found");
total += 15;
}
@Value("${app.ranking.lineage.weight:10}")
protected int lineageWeigth;

if (stacCollectionModel.getExtent().getBbox() != null && !stacCollectionModel.getExtent().getBbox().isEmpty()) {
logger.debug("Extent found");
total += 10;
}
@Value("${app.ranking.theme.minWeight:10}")
protected int themeMinWeigth;

if (stacCollectionModel.getExtent().getTemporal() != null && !stacCollectionModel.getExtent().getTemporal().isEmpty()) {
logger.debug("Temporal found");
total += 10;
}
@Value("${app.ranking.theme.midWeight:15}")
protected int themeMidWeigth;

if (stacCollectionModel.getLinks() != null && !stacCollectionModel.getLinks().isEmpty()) {
if (stacCollectionModel.getLinks().size() <= 2) {
logger.debug("Links found with size: " + stacCollectionModel.getLinks().size());
total += 10;
} else if (stacCollectionModel.getLinks().size() <= 5) {
logger.debug("Links found with size: " + stacCollectionModel.getLinks().size());
total += 15;
} else {
logger.debug("Links found with size: " + stacCollectionModel.getLinks().size());
total += 20;
}
}
@Value("${app.ranking.theme.maxWeight:20}")
protected int themeMaxWeigth;

@Value("${app.ranking.link.minWeight:10}")
protected int linkMinWeigth;

@Value("${app.ranking.link.midWeight:15}")
protected int linkMidWeigth;

@Value("${app.ranking.link.maxWeight:20}")
protected int linkMaxWeigth;

public Integer evaluateCompleteness(StacCollectionModel stacCollectionModel) {
int total = 0;

/*
* The implementation of this method can be adjusted
* https://github.com/aodn/backlog/issues/5233
* The richer the abstract the better
*
* Most should have a geometry , but document do not have , most around 90% so not too useful. Number of spatial extents also not good indicator.
*
* Geometry and spatial extents is use to identify a doc vs a metadata record about data, so it is not so used. And
* there are other ways to identify docs which may be someone what to do it in some stage. The other method is use
* the field -> Type of resource, -> Hierarchy level -> Resource scope ->Document
*
* Theme is not a good indicator
* Link is more important even 1
* Use of keywords, the more the better
* if use Resource lineage then a good record
* Data with resource constraint , license etc is a good record
* For contact, everyone need to have it, but effort to put under citation is a better record
*/
// Keywords store in theme
if (stacCollectionModel.getThemes() != null && !stacCollectionModel.getThemes().isEmpty()) {
log.debug("Keywords found with size: {}", stacCollectionModel.getThemes().size());
if (stacCollectionModel.getThemes().size() <= 2) {
logger.debug("Themes found with size: " + stacCollectionModel.getThemes().size());
total += 10;
total += themeMinWeigth;
} else if (stacCollectionModel.getThemes().size() <= 5) {
logger.debug("Themes found with size: " + stacCollectionModel.getThemes().size());
total += 15;
total += themeMidWeigth;
} else {
logger.debug("Themes found with size: " + stacCollectionModel.getThemes().size());
total += 20;
total += themeMaxWeigth;
}
}

if (stacCollectionModel.getContacts() != null && !stacCollectionModel.getContacts().isEmpty()) {
logger.debug("Contacts found");
total += 10;
// Lineage
if (stacCollectionModel.getSummaries() != null && stacCollectionModel.getSummaries().getStatement() != null) {
log.debug("Lineage found");
total += lineageWeigth;
}
// License
if (stacCollectionModel.getLicense() != null) {
log.debug("License found");
total += licenseWeigth;
}
// Constraint (citation)
if (stacCollectionModel.getCitation() != null) {
log.debug("Citation found");
total += citationWeigth;
}
// Abstract
if (stacCollectionModel.getDescription() != null && !stacCollectionModel.getDescription().isBlank()) {
log.debug("Description found");
total += (int) (stacCollectionModel.getDescription().length() * descriptionWeigth);
}
// Links
if (stacCollectionModel.getLinks() != null && !stacCollectionModel.getLinks().isEmpty()) {
log.debug("Links found with size: {}", stacCollectionModel.getLinks().size());
if (stacCollectionModel.getLinks().size() <= 2) {
total += linkMinWeigth;
}
else if (stacCollectionModel.getLinks().size() <= 5) {
total += linkMidWeigth;
}
else {
total += linkMaxWeigth;
}
}

return total;
Expand Down
6 changes: 6 additions & 0 deletions indexer/src/main/resources/application-edge.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Show only on edge because we do not have a tag version
spring:
info:
git:
location: "classpath:git.properties"

management:
endpoints:
web:
Expand Down
5 changes: 5 additions & 0 deletions indexer/src/main/resources/application-production.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
management:
endpoints:
web:
exposure:
include: "health,info,env,beans,logfile"
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package au.org.aodn.esindexer.service;

import au.org.aodn.esindexer.BaseTestClass;
import au.org.aodn.stac.model.ContactsModel;
import au.org.aodn.stac.model.ExtentModel;
import au.org.aodn.stac.model.LinkModel;
import au.org.aodn.stac.model.StacCollectionModel;
import au.org.aodn.stac.model.ThemesModel;
import au.org.aodn.esindexer.utils.SummariesUtils;
import au.org.aodn.stac.model.*;
import org.junit.jupiter.api.*;
import org.mockito.InjectMocks;
import org.mockito.Mockito;
import org.mockito.Spy;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.ActiveProfiles;
Expand All @@ -35,9 +34,8 @@ public void clear() throws IOException {
super.clearElasticIndex(INDEX_NAME);
}

@Spy
@InjectMocks
RankingServiceImpl mockRankingService;
@Autowired
RankingServiceImpl rankingService;

private StacCollectionModel stacCollectionModel;
private ExtentModel extentModel;
Expand All @@ -51,94 +49,25 @@ public void setUp() {

@Test
public void testNotFound() {
// act
mockRankingService.evaluateCompleteness(stacCollectionModel);
RankingServiceImpl mockRankingService = Mockito.spy(rankingService);
// assert
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
assertEquals(0, mockRankingService.evaluateCompleteness(stacCollectionModel));
}

@Test
public void testTitleFound() {
// arrange
stacCollectionModel.setTitle("Test");
// act
mockRankingService.evaluateCompleteness(stacCollectionModel);
// assert
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
assertEquals(15, mockRankingService.evaluateCompleteness(stacCollectionModel));
}

@Test
public void testDescriptionFound() {
RankingServiceImpl mockRankingService = Mockito.spy(rankingService);
// arrange
stacCollectionModel.setDescription("Test");
// act
mockRankingService.evaluateCompleteness(stacCollectionModel);
// assert
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
assertEquals(15, mockRankingService.evaluateCompleteness(stacCollectionModel));
}

@Test
public void testExtentFound() {
// arrange
List<List<BigDecimal>> bbox = new ArrayList<>();
List<BigDecimal> bigDecimalList1 = new ArrayList<>();
bigDecimalList1.add(new BigDecimal(1));
bigDecimalList1.add(new BigDecimal(2));
bigDecimalList1.add(new BigDecimal(3));
bbox.add(bigDecimalList1);
extentModel.setBbox(bbox);
stacCollectionModel.setExtent(extentModel);

// act
mockRankingService.evaluateCompleteness(stacCollectionModel);

stacCollectionModel.setDescription("The Cape Grim Baseline Air Pollution Station facility, located at the North/West tip of Tasmania (40� 41'S, 144� 41'E), is funded and managed by the Australian Bureau of Meteorology, with the scientific program being jointly supervised with CSIRO Marine and Atmospheric Research. This archive contains 1000 litre air samples contained in stainless steel flasks collected at approximately 3 monthly intervals since 1978. The archive is housed at the Aspendale laboratory of CSIRO Marine and Atmospheric Research. The Cape Grim air archive is invaluable in determining the past atmospheric composition of a wide range of gases. For some of these gases, accurate and precise analytical methods have only recently evolved (for example HFCs and PFCs). The measurements are state-of-the-art in precision and accuracy. They are used to identify trace gas trends in the Southern Hemisphere, which in turn can be used to drive climate change models and identify processes that influence changes to the atmosphere.");
// assert
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
assertEquals(10, mockRankingService.evaluateCompleteness(stacCollectionModel));
}

@Test
public void testTemporalFound() {
// arrange
List<String[]> temporal = new ArrayList<>();
String[] temporal1 = new String[2];
temporal1[0] = "2020-01-01";
temporal1[1] = "2020-01-02";
temporal.add(temporal1);

extentModel.setTemporal(temporal);
stacCollectionModel.setExtent(extentModel);

// act
mockRankingService.evaluateCompleteness(stacCollectionModel);

// assert
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
assertEquals(10, mockRankingService.evaluateCompleteness(stacCollectionModel));
}

@Test
public void testContactsFound() {
// arrange
ContactsModel contact1 = ContactsModel.builder().build();
contact1.setName("Test");
List<ContactsModel> contacts = new ArrayList<>();
contacts.add(contact1);
stacCollectionModel.setContacts(contacts);

// act
mockRankingService.evaluateCompleteness(stacCollectionModel);

// assert
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
assertEquals(10, mockRankingService.evaluateCompleteness(stacCollectionModel));
}

@Test
public void testLinksFound() {
RankingServiceImpl mockRankingService = Mockito.spy(rankingService);
// arrange
List<LinkModel> links = new ArrayList<>();
LinkModel link1 = LinkModel.builder().build();
Expand All @@ -157,16 +86,14 @@ public void testLinksFound() {

stacCollectionModel.setLinks(links);

// act
mockRankingService.evaluateCompleteness(stacCollectionModel);

// assert
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
assertEquals(15, mockRankingService.evaluateCompleteness(stacCollectionModel));
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
}

@Test
public void testThemesFound() {
RankingServiceImpl mockRankingService = Mockito.spy(rankingService);
// arrange
List<ThemesModel> themes = new ArrayList<>();
ThemesModel theme1 = ThemesModel.builder().build();
Expand All @@ -179,11 +106,23 @@ public void testThemesFound() {

stacCollectionModel.setThemes(themes);

// act
mockRankingService.evaluateCompleteness(stacCollectionModel);
// assert
assertEquals(mockRankingService.linkMinWeigth, mockRankingService.evaluateCompleteness(stacCollectionModel));
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
}

@Test
public void testLinageFound() {
RankingServiceImpl mockRankingService = Mockito.spy(rankingService);
// arrange
stacCollectionModel.setSummaries(SummariesModel
.builder()
.statement("Statement")
.build()
);

// assert
assertEquals(mockRankingService.lineageWeigth, mockRankingService.evaluateCompleteness(stacCollectionModel));
verify(mockRankingService, times(1)).evaluateCompleteness(stacCollectionModel);
assertEquals(10, mockRankingService.evaluateCompleteness(stacCollectionModel));
}
}
Loading

0 comments on commit 934c5ab

Please sign in to comment.