Skip to content

Commit

Permalink
Use the hashstore method to get system metadata and object.
Browse files Browse the repository at this point in the history
  • Loading branch information
taojing2002 committed Jun 25, 2024
1 parent 186e739 commit ad2dd13
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 55 deletions.
78 changes: 26 additions & 52 deletions src/main/java/org/dataone/cn/indexer/SolrIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -169,10 +170,9 @@ public void setDeleteSubprocessors(

/**
* Generate the index for the given information
* @param id
* @param systemMetadata
* @param dataStream
* @return
* @param id the id which will be indexed
* @param isSystemetaChange if this is a change on the system metadata only
* @return a map of solr doc with ids
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
Expand All @@ -184,19 +184,15 @@ public void setDeleteSubprocessors(
* @throws NotFound
* @throws NotImplemented
*/
private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata,
String objectPath, boolean isSysmetaChangeOnly)
private Map<String, SolrDoc> process(String id, boolean isSysmetaChangeOnly)
throws IOException, SAXException, ParserConfigurationException,
XPathExpressionException, MarshallingException, EncoderException,
SolrServerException, NotImplemented, NotFound, UnsupportedType{
log.debug("SolrIndex.process - trying to generate the solr doc object for the pid "+id);
long start = System.currentTimeMillis();
Map<String, SolrDoc> docs = new HashMap<String, SolrDoc>();
// Load the System Metadata document
ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream();
TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream);
ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray());
try {
try (InputStream systemMetadataStream = ObjectManager.getInstance().getSystemMetadataStream(id)){
docs = systemMetadataProcessor.processDocument(id, docs, systemMetadataStream);
} catch (Exception e) {
log.error(e.getMessage(), e);
Expand All @@ -219,12 +215,11 @@ private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata,
+ ". Even though this is a systemmetadata-change-only event, we can NOT "
+ "just reindex the systemmeta only.");
}

}
log.debug("SolrIndex.process - the value of skipOtherProcessors is " + skipOtherProcessor +
" and the object path is " + objectPath + " for the id " + id);
log.debug("SolrIndex.process - the value of skipOtherProcessors is " + skipOtherProcessor
+ " for the id " + id);
//if the objectPath is null, we should skip the other processes
if (!skipOtherProcessor && objectPath != null) {
if (!skipOtherProcessor) {
log.debug("SolrIndex.process - Start to use subprocessor list to process " + id);
// Determine if subprocessors are available for this ID
if (subprocessors != null) {
Expand All @@ -234,31 +229,21 @@ private Map<String, SolrDoc> process(String id, SystemMetadata systemMetadata,
if (subprocessor.canProcess(formatId)) {
// if so, then extract the additional information from the
// document.
try {
try (InputStream dataStream = ObjectManager.getInstance().getObject(id)) {
// docObject = the resource map document or science
// metadata document.
// note that resource map processing touches all objects
// referenced by the resource map.
FileInputStream dataStream = new FileInputStream(objectPath);
if (!dataStream.getFD().valid()) {
log.error("SolrIndex.process - subprocessor "
+ subprocessor.getClass().getName()
+ " couldn't process since it could not load OBJECT file for ID,Path="
+ id + ", " + objectPath);
//throw new Exception("Could not load OBJECT for ID " + id );
} else {
start = System.currentTimeMillis();
docs = subprocessor.processDocument(id, docs, dataStream);
end = System.currentTimeMillis();
log.info("SolrIndex.process - the time for calling processDocument "
+ "for the subprocessor " + subprocessor.getClass().getName()
+" for the pid " + id + " is " + (end-start) + "milliseconds.");
log.debug("SolrIndex.process - subprocessor "
+ subprocessor.getClass().getName()
+" generated solr doc for id "+id);
}
start = System.currentTimeMillis();
docs = subprocessor.processDocument(id, docs, dataStream);
end = System.currentTimeMillis();
log.info("SolrIndex.process - the time for calling processDocument "
+ "for the subprocessor " + subprocessor.getClass().getName()
+" for the pid " + id + " is " + (end-start) + "milliseconds.");
log.debug("SolrIndex.process - subprocessor "
+ subprocessor.getClass().getName()
+" generated solr doc for id "+id);
} catch (Exception e) {
e.printStackTrace();
log.error(e.getMessage(), e);
throw new SolrServerException(e.getMessage());
}
Expand Down Expand Up @@ -365,19 +350,13 @@ private boolean isSystemMetadataField(String fieldName) {

/**
* Check the parameters of the insert or update methods.
* @param pid
* @param systemMetadata
* @param data
* @param pid the pid which will be indexed
* @throws SolrServerException
*/
private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws InvalidRequest {
private void checkParams(Identifier pid) throws InvalidRequest {
if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) {
throw new InvalidRequest("0000", "The identifier of the indexed document should not be null or blank.");
}
if(systemMetadata == null) {
throw new InvalidRequest("0000", "The system metadata of the indexed document "
+ pid.getValue() + " should not be null.");
}
}

/**
Expand All @@ -393,15 +372,14 @@ private void checkParams(Identifier pid, SystemMetadata systemMetadata, String o
* @throws NotImplemented
* @throws InvalidRequest
*/
private void insert(Identifier pid, SystemMetadata systemMetadata,
String objectPath, boolean isSysmetaChangeOnly)
private void insert(Identifier pid, boolean isSysmetaChangeOnly)
throws IOException, SAXException, ParserConfigurationException, InvalidRequest,
XPathExpressionException, SolrServerException, MarshallingException,
EncoderException, NotImplemented, NotFound, UnsupportedType {
checkParams(pid, systemMetadata, objectPath);
checkParams(pid);
log.debug("SolrIndex.insert - trying to insert the solrDoc for object "+pid.getValue());
long start = System.currentTimeMillis();
Map<String, SolrDoc> docs = process(pid.getValue(), systemMetadata, objectPath, isSysmetaChangeOnly);
Map<String, SolrDoc> docs = process(pid.getValue(), isSysmetaChangeOnly);
long end = System.currentTimeMillis();
log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is "
+ (end-start) + " milliseconds.");
Expand Down Expand Up @@ -471,11 +449,8 @@ public void update(Identifier pid, String relativePath, boolean isSysmetaChangeO
InstantiationException, IllegalAccessException {
log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object "
+ pid.getValue());
String objectPath = null;
SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativePath);
objectPath = ObjectManager.getInstance().getFilePath(relativePath, systemMetadata.getFormatId().getValue());
try {
insert(pid, systemMetadata, objectPath, isSysmetaChangeOnly);
insert(pid, isSysmetaChangeOnly);
} catch (SolrServerException e) {
if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) {
log.info("SolrIndex.update - Indexer grabbed an older verion (version conflict) of "
Expand All @@ -484,8 +459,7 @@ public void update(Identifier pid, String relativePath, boolean isSysmetaChangeO
for (int i=0; i<VERSION_CONFLICT_MAX_ATTEMPTS; i++) {
try {
Thread.sleep(VERSION_CONFICT_WAITING);
systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativePath);
insert(pid, systemMetadata, objectPath, isSysmetaChangeOnly);
insert(pid, isSysmetaChangeOnly);
break;
} catch (SolrException ee) {
if (ee.getMessage().contains(VERSION_CONFLICT)) {
Expand Down
22 changes: 20 additions & 2 deletions src/main/java/org/dataone/cn/indexer/object/ObjectManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.io.InputStream;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.security.NoSuchAlgorithmException;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.output.ByteArrayOutputStream;
Expand Down Expand Up @@ -129,10 +130,12 @@ public static ObjectManager getInstance() throws ServiceFailure,
* @throws IOException
* @throws IllegalAccessException
* @throws InstantiationException
* @throws NoSuchAlgorithmException
*/
public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized,
NotImplemented, ServiceFailure, NotFound, InstantiationException,
IllegalAccessException, IOException, MarshallingException {
NoSuchAlgorithmException, IllegalAccessException, IOException,
MarshallingException {
long start = System.currentTimeMillis();
//try to get the system metadata from the storage system first
InputStream sysmetaInputStream = null;
Expand Down Expand Up @@ -200,9 +203,10 @@ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAu
* @throws IllegalAccessException
* @throws IOException
* @throws MarshallingException
* @throws NoSuchAlgorithmException
*/
public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id)
throws InvalidToken, NotAuthorized,
throws InvalidToken, NotAuthorized, NoSuchAlgorithmException,
NotImplemented, ServiceFailure, NotFound,
InstantiationException, IllegalAccessException,
IOException, MarshallingException {
Expand All @@ -226,6 +230,20 @@ public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id)
return sysmeta;
}

/**
* Get the input stream of the content of the given pid
* @param pid the identifier of the content
* @return the input stream of the content
* @throws IllegalArgumentException
* @throws FileNotFoundException
* @throws NoSuchAlgorithmException
* @throws IOException
*/
public InputStream getObject(String pid) throws IllegalArgumentException, FileNotFoundException,
NoSuchAlgorithmException, IOException {
return storage.retrieveObject(pid);
}

/**
* Set the d1 node for this object manager.
* We only use it for testing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -231,7 +232,7 @@ private boolean isHeadVersion(Identifier pid, Identifier sid) {

private SolrDoc _mergeMappedReference(ResourceEntry resourceEntry, SolrDoc mergeDocument)
throws InvalidToken, NotAuthorized, NotImplemented,
ServiceFailure, NotFound, InstantiationException,
NoSuchAlgorithmException, ServiceFailure, NotFound, InstantiationException,
IllegalAccessException, IOException, MarshallingException {

Identifier identifier = new Identifier();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.dataone.cn.indexer.resourcemap;

import java.io.IOException;
import java.security.NoSuchAlgorithmException;

import org.apache.log4j.Logger;
import org.dataone.cn.indexer.object.ObjectManager;
Expand Down Expand Up @@ -54,6 +55,8 @@ public boolean isDocumentVisible(Identifier pid) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
} catch (MarshallingException e) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
} catch (NoSuchAlgorithmException e) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
}
return visible;
}
Expand Down Expand Up @@ -88,6 +91,8 @@ public boolean documentExists(Identifier pid) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
} catch (MarshallingException e) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
} catch (NoSuchAlgorithmException e) {
logger.warn("Could not get visible value for pid: " + pid.getValue() + " since " +e.getMessage());
}
return exists;
}
Expand Down

0 comments on commit ad2dd13

Please sign in to comment.