Skip to content

Commit

Permalink
Merge branch 'release-2018.01' into clarin
Browse files Browse the repository at this point in the history
  • Loading branch information
kosarko committed Feb 16, 2018
2 parents 933f587 + 9dd0bbd commit 54c710f
Show file tree
Hide file tree
Showing 18 changed files with 645 additions and 228 deletions.
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
/* Created for LINDAT/CLARIN */
package cz.cuni.mff.ufal.curation;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.sql.SQLException;
import java.util.Enumeration;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;

import cz.cuni.mff.ufal.DSpaceApi;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.compressors.CompressorException;
import org.apache.commons.compress.compressors.CompressorInputStream;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.*;
Expand All @@ -28,7 +25,6 @@
import org.dspace.core.Context;
import org.dspace.content.Bitstream;
import org.dspace.core.Constants;
import org.dspace.storage.bitstore.BitstreamStorageManager;


public class ProcessBitstreams extends AbstractCurationTask implements Consumer {
Expand Down Expand Up @@ -76,7 +72,8 @@ public int perform(DSpaceObject dso) throws IOException {

boolean processItem(Item item) throws SQLException, AuthorizeException {
int processed = 0;
for ( Bundle bundle : item.getBundles("ORIGINAL") ) {
// we filter for ORIGINAL later on
for ( Bundle bundle : item.getBundles() ) {
for ( Bitstream b : bundle.getBitstreams() ) {
if (OK == processBitstream(b)) {
processed += 1;
Expand Down Expand Up @@ -108,7 +105,6 @@ public void consume(Context ctx, Event event) throws Exception {
}

DSpaceObject subject = event.getSubject(ctx);
DSpaceObject object = event.getObject(ctx);
int et = event.getEventType();
Bitstream b = (Bitstream)subject;

Expand All @@ -132,26 +128,52 @@ static int processBitstream(Bitstream b) throws SQLException, AuthorizeException
return ret;
}

static InputStream getIS(String mime, InputStream is) {
if ( mime.equals("application/zip") ) {
return new ZipArchiveInputStream(is);
}
else if ( mime.equals("application/x-gzip") ) {
}
else if ( mime.equals("application/gzip") ) {
}
else if ( mime.equals("application/x-tar") ) {
return new TarArchiveInputStream(is);
}
else if ( mime.equals("application/x-xz") ) {
static InputStream getIS(String mime, InputStream is) throws CompressorException, ArchiveException {

is = new BufferedInputStream(is);
InputStream ret = null;

switch (mime){
case "application/x-gzip":
case "application/gzip":
case "application/x-xz":
try{
CompressorInputStream cis = new CompressorStreamFactory().createCompressorInputStream(is);
ret = new ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(cis));
}catch (CompressorException e){
log.error("Failed to extract known mime-type " + mime);
log.error(e);
throw e;
}catch (ArchiveException e){
log.debug("Not a compressed archive (eg. .tgz)");
}
break;
case "application/zip":
case "application/x-tar":
try{
ret = new ArchiveStreamFactory().createArchiveInputStream(is);
}catch (ArchiveException e){
log.error("Failed to extract known archive mime-type=" + mime);
log.error(e);
throw e;
}
break;
default: break;
}
else if ( mime.startsWith("text/plain") ) {
return is;

if (ret == null && mime.startsWith("text/plain") ) {
ret = is;
}
return null;
return ret;
}

static int addBitstreamContent(Bitstream b) throws SQLException, AuthorizeException {

// Clear on all bitstream no matter if PUB or what bundle they are in
// In particular clean LICENSE preview and preview on RES items
b.clearMetadata(schema, element, qualifier, Item.ANY);
b.update();

Context context = new Context(Context.READ_ONLY);
context.setCurrentUser(null);
try {
Expand All @@ -164,7 +186,19 @@ static int addBitstreamContent(Bitstream b) throws SQLException, AuthorizeExcept
context.complete();
}

b.clearMetadata(schema, element, qualifier, Item.ANY);
// Skip the non ORIGINAL bitstreams after we've cleared the bitstream metadata
// earlier versions generated previews for LICENSE and other bundles
// this ensures those are cleared when the item is curated again.
boolean original = false;
for(Bundle bundle : b.getBundles()){
if("ORIGINAL".equals(bundle.getName())){
original = true;
}
}
if(!original){
b.update();
return SKIPPED;
}

//
try {
Expand All @@ -176,11 +210,17 @@ static int addBitstreamContent(Bitstream b) throws SQLException, AuthorizeExcept
if(is instanceof ArchiveInputStream) {
ArchiveInputStream ais = (ArchiveInputStream)is;
ArchiveEntry entry;
int i = 0;
while ((entry = ais.getNextEntry()) != null) {
String content = String.format(
"%s|%d", entry.getName(), entry.getSize()
);
b.addMetadata( schema, element, qualifier, Item.ANY, content );
//don't add more than 1000 files
if(++i >= 1000){
b.addMetadata(schema, element, qualifier, Item.ANY, String.format("%s|%d", "...", 0));
break;
}
}
} else {
InputStreamReader r = new InputStreamReader(is);
Expand All @@ -189,6 +229,7 @@ static int addBitstreamContent(Bitstream b) throws SQLException, AuthorizeExcept
b.addMetadata( schema, element, qualifier, Item.ANY, new String(cbuf) );
}
} catch (Exception e) {
log.error("Error on bitstream " + b.getID());
log.error(e);
return ERROR;
}
Expand Down
31 changes: 30 additions & 1 deletion dspace-api/src/main/java/cz/cuni/mff/ufal/dspace/IOUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@

import org.apache.log4j.Logger;

import javax.servlet.http.HttpServletRequest;
import java.text.SimpleDateFormat;

import static org.apache.commons.lang3.StringUtils.isNotBlank;

public class IOUtils {

private static final Pattern date_pattern = Pattern.compile("(\\d\\d\\d\\d-\\d\\d-\\d\\d)");
Expand Down Expand Up @@ -197,6 +200,32 @@ public static String run( File where, String[] cmd) {
}
return message;
}


public static boolean requestRangeContainsStart(HttpServletRequest request){
String rangeHeader = request.getHeader("range");
try {
if (isNotBlank(rangeHeader)) {
String[] parts = rangeHeader.split("=", 2);
String unit = parts[0];
String ranges = parts[1].replaceAll(" ", "");
for (String range : ranges.split(",")) {
String[] rangeParts = range.split("-");
String start = rangeParts[0];
if ("0".equals(start.trim())) {
log.debug("Found zero in range header.");
return true;
}
}
} else {
// No range header -> always stream from start
return true;
}
}catch (Exception e){
// issue with parsing the header, assume we stream from beginning
return true;
}
log.debug("No zero in range header.");
return false;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public String run( ReportInfo ri ) {
}

if ( output_all ) {
ret = StringUtils.join(results, "\n");
ret += StringUtils.join(results, "\n") + "\n";
}
c.complete();
} catch (Exception e) {
Expand All @@ -95,7 +95,7 @@ private static String output_checklinks(ArrayList<String> results) {
if (str.trim().endsWith("- OK")) {
continue;
} else if (str.trim().startsWith("Item:")) {
last_item = str;
last_item = str.substring(0, str.indexOf(']')+1);
} else {
if (last_item != null) {
ret += last_item + "\n";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/
package cz.cuni.mff.ufal.health;

import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
Expand Down Expand Up @@ -45,7 +46,8 @@ public String run( ReportInfo ri ) {
return "PLEASE configure lr.harvester.info.url";
}
String dspace_name = ConfigurationManager.getProperty("dspace.name");
dspace_name = dspace_name.replaceAll("[ ,/-]+","_");
dspace_name = dspace_name.replaceAll("[() ,/.\"-]+","_");
dspace_name = StringUtils.stripAccents(dspace_name);
harvesterInfoUrl = harvesterInfoUrl.endsWith("/") ? harvesterInfoUrl + dspace_name :
harvesterInfoUrl + "/" + dspace_name;
harvesterInfoUrl = harvesterInfoUrl + ".html";
Expand Down
60 changes: 55 additions & 5 deletions dspace-api/src/main/java/org/dspace/content/Item.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@
import java.io.InputStream;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.*;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.dspace.app.util.AuthorizeUtil;
import org.dspace.authorize.AuthorizeConfiguration;
Expand Down Expand Up @@ -2246,5 +2242,59 @@ public void setReplacedBy(String pid) {
this.addMetadatum(md);
}
}

/**
* Follow the provided relation and record the visited handles
*
* @param relation - name of relation e.g. isreplacedby or replaces
* @return A list of handles that are in relation to this item
* @throws SQLException
*/
public java.util.Collection<String> getRelationChain(String relation) throws SQLException {
String handle = this.getHandle();
Set<String> relatedHandles = new HashSet<>();

// Are there relations for this item?
TableRow row = DatabaseManager.querySingle(ourContext, "select count(*) as relation_count from metadatavalue mv" +
" natural join metadatafieldregistry join handle h on" +
" h.resource_id = mv.resource_id and h.resource_type_id = mv.resource_type_id" +
" where handle = ? and element = 'relation' and qualifier = ?;",
handle, relation);
if(row == null || row.getIntColumn("relation_count") < 1){
return relatedHandles;
}

//There are relations for this handle so fetch all relations and walk through them
TableRowIterator rows = DatabaseManager.query(ourContext,
"select concat('http://hdl.handle.net/', handle) as handle, text_value " +
"as relation from metadatavalue mv natural join metadatafieldregistry join handle h on " +
"h.resource_id = mv.resource_id and h.resource_type_id = mv.resource_type_id" +
" where element = 'relation' and qualifier=?;", relation);
Map<String,List<String>> handle2relations = new HashMap<>();
while(rows.hasNext()){
row = rows.next(ourContext);
String row_handle = row.getStringColumn("handle");
List<String> relations = handle2relations.get(row_handle);
if(relations == null){
relations = new LinkedList<>();
handle2relations.put(row_handle, relations);
}
relations.add(row.getStringColumn("relation"));
}
LinkedList<String> handlesToProcess = new LinkedList<>();
handlesToProcess.add("http://hdl.handle.net/" + handle);
while(!handlesToProcess.isEmpty()){
List<String> relations = handle2relations.get(handlesToProcess.pop());
if(relations != null) {
for (String rel : relations) {
if(!relatedHandles.contains(rel)) {
relatedHandles.add(rel);
handlesToProcess.push(rel);
}
}
}
}
return relatedHandles;
}
}

Loading

0 comments on commit 54c710f

Please sign in to comment.