Skip to content

Commit

Permalink
Handle Zip files on upload
Browse files Browse the repository at this point in the history
Extract them into their individual files before handling like normal files. Refactors all controllers that handle file uploads to use the new utilities.
  • Loading branch information
trevorgerhardt committed Oct 27, 2023
1 parent c7317e1 commit d2e7fa2
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,18 @@
import com.conveyal.file.FileUtils;
import com.conveyal.gtfs.GTFSCache;
import com.conveyal.gtfs.GTFSFeed;
import com.conveyal.gtfs.error.GTFSError;
import com.conveyal.gtfs.error.GeneralError;
import com.conveyal.gtfs.model.Stop;
import com.conveyal.gtfs.validator.PostLoadValidator;
import com.conveyal.osmlib.Node;
import com.conveyal.osmlib.OSM;
import com.conveyal.r5.analyst.progress.ProgressInputStream;
import com.conveyal.r5.analyst.cluster.TransportNetworkConfig;
import com.conveyal.r5.analyst.progress.ProgressInputStream;
import com.conveyal.r5.analyst.progress.Task;
import com.conveyal.r5.streets.OSMCache;
import com.conveyal.r5.util.ExceptionUtils;
import com.mongodb.QueryBuilder;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.bson.types.ObjectId;
import org.locationtech.jts.geom.Envelope;
import org.slf4j.Logger;
Expand Down Expand Up @@ -107,19 +105,25 @@ private Bundle create (Request req, Response res) {
// Do some initial synchronous work setting up the bundle to fail fast if the request is bad.
final Map<String, List<FileItem>> files = HttpUtils.getRequestFiles(req.raw());
final Bundle bundle = new Bundle();
final File osmPbfFile;
final List<File> gtfsZipFiles;
try {
bundle.name = files.get("bundleName").get(0).getString("UTF-8");
bundle.regionId = files.get("regionId").get(0).getString("UTF-8");

if (files.get("osmId") != null) {
osmPbfFile = null;
bundle.osmId = files.get("osmId").get(0).getString("UTF-8");
Bundle bundleWithOsm = Persistence.bundles.find(QueryBuilder.start("osmId").is(bundle.osmId).get()).next();
if (bundleWithOsm == null) {
throw AnalysisServerException.badRequest("Selected OSM does not exist.");
}
} else {
osmPbfFile = HttpUtils.storeFileItem(files.get("osm").get(0));
}

if (files.get("feedGroupId") != null) {
gtfsZipFiles = null;
bundle.feedGroupId = files.get("feedGroupId").get(0).getString("UTF-8");
Bundle bundleWithFeed = Persistence.bundles.find(QueryBuilder.start("feedGroupId").is(bundle.feedGroupId).get()).next();
if (bundleWithFeed == null) {
Expand All @@ -134,6 +138,8 @@ private Bundle create (Request req, Response res) {
bundle.feeds = bundleWithFeed.feeds;
bundle.feedsComplete = bundleWithFeed.feedsComplete;
bundle.totalFeeds = bundleWithFeed.totalFeeds;
} else {
gtfsZipFiles = HttpUtils.storeFileItems(files.get("feedGroup"));
}
UserPermissions userPermissions = UserPermissions.from(req);
bundle.accessGroup = userPermissions.accessGroup;
Expand All @@ -155,16 +161,15 @@ private Bundle create (Request req, Response res) {
.withWorkProduct(BUNDLE, bundle._id, bundle.regionId)
.withAction(progressListener -> {
try {
if (bundle.osmId == null) {
if (osmPbfFile != null) {
// Process uploaded OSM.
bundle.osmId = new ObjectId().toString();
DiskFileItem fi = (DiskFileItem) files.get("osm").get(0);
// Here we perform minimal validation by loading the OSM, but don't retain the resulting MapDB.
OSM osm = new OSM(null);
osm.intersectionDetection = true;
// Number of entities in an OSM file is unknown, so derive progress from the number of bytes read.
// Wrapping in buffered input stream should reduce number of progress updates.
osm.readPbf(ProgressInputStream.forFileItem(fi, progressListener));
osm.readPbf(ProgressInputStream.forFile(osmPbfFile, progressListener));
// osm.readPbf(new BufferedInputStream(fi.getInputStream()));
Envelope osmBounds = new Envelope();
for (Node n : osm.nodes.values()) {
Expand All @@ -173,10 +178,10 @@ private Bundle create (Request req, Response res) {
osm.close();
checkWgsEnvelopeSize(osmBounds, "OSM data");
// Store the source OSM file. Note that we're not storing the derived MapDB file here.
fileStorage.moveIntoStorage(OSMCache.getKey(bundle.osmId), fi.getStoreLocation());
fileStorage.moveIntoStorage(OSMCache.getKey(bundle.osmId), osmPbfFile);
}

if (bundle.feedGroupId == null) {
if (gtfsZipFiles != null) {
// Process uploaded GTFS files
bundle.feedGroupId = new ObjectId().toString();

Expand All @@ -186,8 +191,7 @@ private Bundle create (Request req, Response res) {
bundle.feeds = new ArrayList<>();
bundle.totalFeeds = files.get("feedGroup").size();

for (FileItem fileItem : files.get("feedGroup")) {
File feedFile = ((DiskFileItem) fileItem).getStoreLocation();
for (File feedFile : gtfsZipFiles) {
ZipFile zipFile = new ZipFile(feedFile);
File tempDbFile = FileUtils.createScratchFile("db");
File tempDbpFile = new File(tempDbFile.getAbsolutePath() + ".p");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.conveyal.analysis.AnalysisServerException;
import com.conveyal.analysis.UserPermissions;
import com.conveyal.analysis.components.TaskScheduler;
import com.conveyal.analysis.datasource.DataSourceUtil;
import com.conveyal.analysis.grids.SeamlessCensusGridExtractor;
import com.conveyal.analysis.models.DataGroup;
import com.conveyal.analysis.models.OpportunityDataset;
Expand All @@ -29,7 +30,6 @@
import com.google.common.io.Files;
import com.mongodb.QueryBuilder;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.commons.io.FilenameUtils;
import org.bson.types.ObjectId;
import org.slf4j.Logger;
Expand All @@ -55,7 +55,6 @@
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate;
import static com.conveyal.analysis.util.JsonUtil.toJson;
import static com.conveyal.file.FileCategory.GRIDS;
import static com.conveyal.r5.analyst.WebMercatorGridPointSet.parseZoom;
Expand Down Expand Up @@ -327,31 +326,25 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res
// are recorded in a persistent purpose-built way rather than falling back on the UI's catch-all error window.
// TODO more standardized mechanism for tracking asynchronous tasks and catching exceptions on them
OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, sourceName);
addStatusAndRemoveOldStatuses(status);

// TODO should we delete this temporary directory at the end?
final File tmpDirectory = FileUtils.createScratchDirectory();
final List<File> files = new ArrayList<>();
final List<FileItem> fileItems;
final List<File> files;
final FileStorageFormat uploadFormat;
final Map<String, String> parameters;
try {
// Validate inputs and parameters, which will throw an exception if there's anything wrong with them.
// Call remove() rather than get() so that subsequent code will see only string parameters, not the files.
fileItems = formFields.remove("files");
for (var fi : fileItems) {
var tmpFile = new File(tmpDirectory, fi.getName());
Files.move(((DiskFileItem) fi).getStoreLocation(), tmpFile);
files.add(tmpFile);
}
uploadFormat = detectUploadFormatAndValidate(files);
files = HttpUtils.storeFileItemsAndUnzip(formFields.remove("files"));
uploadFormat = DataSourceUtil.detectUploadFormatAndValidate(files);
parameters = extractStringParameters(formFields);
} catch (Exception e) {
status.completeWithError(e);
res.status(400);
return status;
}

// Add the status to the region wide tracker before we begin the heavy tasks.
addStatusAndRemoveOldStatuses(status);

// We are going to call several potentially slow blocking methods to create and persist new pointsets.
// This whole series of actions will be run sequentially but within an asynchronous Executor task.
// After enqueueing, the status is returned so the UI can track progress.
Expand Down Expand Up @@ -631,6 +624,7 @@ public static class OpportunityDatasetUploadStatus implements ProgressListener {
public Status status = Status.PROCESSING;
public String name;
public String message;
public String stackTrace;
public Date createdAt;
public Date completedAt;

Expand All @@ -647,7 +641,8 @@ private void completed (Status status) {
}

public void completeWithError (Exception e) {
message = "Unable to create opportunity dataset. " + ExceptionUtils.stackTraceString(e);
stackTrace = ExceptionUtils.stackTraceString(e);
message = "Unable to create opportunity dataset. " + e.getMessage();
completed(Status.ERROR);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,23 @@
import com.conveyal.analysis.UserPermissions;
import com.conveyal.analysis.models.DataSource;
import com.conveyal.analysis.persistence.AnalysisCollection;
import com.conveyal.analysis.util.HttpUtils;
import com.conveyal.file.FileStorage;
import com.conveyal.file.FileStorageFormat;
import com.conveyal.file.FileStorageKey;
import com.conveyal.r5.analyst.progress.ProgressListener;
import com.conveyal.r5.analyst.progress.TaskAction;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.commons.io.FilenameUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;

import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate;
import static com.conveyal.analysis.util.HttpUtils.getFormField;
import static com.conveyal.file.FileCategory.DATASOURCES;
import static com.conveyal.file.FileStorageFormat.SHP;
Expand Down Expand Up @@ -127,15 +125,9 @@ public static DataSourceUploadAction forFormFields (
// Extract required parameters. Throws AnalysisServerException on failure, e.g. if a field is missing.
final String sourceName = getFormField(formFields, "sourceName", true);
final String regionId = getFormField(formFields, "regionId", true);
final List<FileItem> fileItems = formFields.get("sourceFiles");
final List<File> files = new ArrayList<>();
final List<File> files = HttpUtils.storeFileItemsAndUnzip(formFields.get("sourceFiles"));

for (var fi : fileItems) {
var dfi = (DiskFileItem) fi;
files.add(dfi.getStoreLocation());
}

FileStorageFormat format = detectUploadFormatAndValidate(files);
FileStorageFormat format = DataSourceUtil.detectUploadFormatAndValidate(files);
DataSourceIngester ingester = DataSourceIngester.forFormat(format);

String originalFileNames = files.stream().map(File::getName).collect(Collectors.joining(", "));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ public abstract class DataSourceUtil {
* @throws DataSourceException if the type of the upload can't be detected or preconditions are violated.
* @return the expected type of the uploaded file or files, never null.
*/
public static FileStorageFormat detectUploadFormatAndValidate (List<File> fileItems) {
if (isNullOrEmpty(fileItems)) {
public static FileStorageFormat detectUploadFormatAndValidate (List<File> files) {
if (isNullOrEmpty(files)) {
throw new DataSourceException("You must select some files to upload.");
}
Set<String> fileExtensions = extractFileExtensions(fileItems);
Set<String> fileExtensions = extractFileExtensions(files);
if (fileExtensions.isEmpty()) {
throw new DataSourceException("No file extensions seen, cannot detect upload type.");
}
checkFileCharacteristics(fileItems);
checkFileCharacteristics(files);
if (fileExtensions.contains("zip")) {
throw new DataSourceException("Upload of spatial .zip files not yet supported");
// TODO unzip and process unzipped files - will need to peek inside to detect GTFS uploads first.
Expand All @@ -45,7 +45,7 @@ public static FileStorageFormat detectUploadFormatAndValidate (List<File> fileIt
final Set<String> shapefileExtensions = Sets.newHashSet("shp", "dbf", "prj");
if ( ! Sets.intersection(fileExtensions, shapefileExtensions).isEmpty()) {
if (fileExtensions.containsAll(shapefileExtensions)) {
verifyBaseNamesSame(fileItems);
verifyBaseNamesSame(files);
// TODO check that any additional file is .shx, and that there are no more than 4 files.
} else {
throw new DataSourceException("You must multi-select at least SHP, DBF, and PRJ files for shapefile upload.");
Expand Down Expand Up @@ -113,5 +113,4 @@ private static void verifyBaseNamesSame (List<File> files) {
}
}
}

}
55 changes: 51 additions & 4 deletions src/main/java/com/conveyal/analysis/util/HttpUtils.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
package com.conveyal.analysis.util;

import com.conveyal.analysis.AnalysisServerException;
import com.conveyal.analysis.datasource.DataSourceException;
import com.conveyal.file.FileUtils;
import com.conveyal.r5.util.ExceptionUtils;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.FileItemFactory;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;

import javax.servlet.http.HttpServletRequest;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -36,8 +39,7 @@ public static Map<String, List<FileItem>> getRequestFiles (HttpServletRequest re
// If we always saved the FileItems via write() or read them with getInputStream() they would not all need to
// be on disk.
try {
FileItemFactory fileItemFactory = new DiskFileItemFactory(0, null);
ServletFileUpload sfu = new ServletFileUpload(fileItemFactory);
ServletFileUpload sfu = new ServletFileUpload();
return sfu.parseParameterMap(req);
} catch (Exception e) {
throw AnalysisServerException.fileUpload(ExceptionUtils.stackTraceString(e));
Expand Down Expand Up @@ -66,4 +68,49 @@ public static String getFormField(Map<String, List<FileItem>> formFields, String
fieldName));
}
}

public static List<File> storeFileItemsAndUnzip(List<FileItem> fileItems) {
return storeFileItemsAndUnzip(fileItems, FileUtils.createScratchDirectory());
}

/**
* Convert `FileItem`s into `File`s and move them into the given directory. Automatically unzip files. Return the
* list of new `File` handles.
*/
public static List<File> storeFileItemsAndUnzip(List<FileItem> fileItems, File directory) {
List<File> files = new ArrayList<>();
for (FileItem fi : fileItems) {
File file = storeFileItemInDirectory(fi, directory);
String name = file.getName();
if (name.toLowerCase().endsWith(".zip")) {
files.addAll(FileUtils.unZipFileIntoDirectory(file, directory));
} else {
files.add(file);
}
}
return files;
}

public static List<File> storeFileItems(List<FileItem> fileItems) {
File directory = FileUtils.createScratchDirectory();
List<File> files = new ArrayList<>();
for (FileItem fileItem : fileItems) {
files.add(storeFileItemInDirectory(fileItem, directory));
}
return files;
}

public static File storeFileItem(FileItem fileItem) {
return storeFileItemInDirectory(fileItem, FileUtils.createScratchDirectory());
}

public static File storeFileItemInDirectory(FileItem fileItem, File directory) {
try {
File file = new File(directory, fileItem.getName());
fileItem.getInputStream().transferTo(FileUtils.getOutputStream(file));
return file;
} catch (IOException e) {
throw new DataSourceException(e.getMessage());
}
}
}
Loading

0 comments on commit d2e7fa2

Please sign in to comment.