From 1f7a882d7d11e6c1391aefc8340ba04c95d4facf Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 27 Oct 2023 13:53:14 +0800 Subject: [PATCH 01/10] Convert FileItems to Files before using We use Apache FileUpload to assist with handling incoming files. We also use a DiskItemFactory that persists each incoming file to disk immediately. This means that we could represent each incoming file as a java.io.File instead of a FileItem. java.io.File is a much more common way of handling a file and easier to work with. --- .../OpportunityDatasetController.java | 62 ++++++++++--------- .../datasource/DataSourceUploadAction.java | 32 ++++++---- .../analysis/datasource/DataSourceUtil.java | 22 +++---- .../conveyal/r5/analyst/FreeFormPointSet.java | 9 +-- .../java/com/conveyal/r5/analyst/Grid.java | 8 +-- 5 files changed, 69 insertions(+), 64 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index b1afcfc05..5941cfeec 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -11,7 +11,6 @@ import com.conveyal.analysis.persistence.AnalysisCollection; import com.conveyal.analysis.persistence.AnalysisDB; import com.conveyal.analysis.persistence.Persistence; -import com.conveyal.analysis.util.FileItemInputStreamProvider; import com.conveyal.analysis.util.HttpUtils; import com.conveyal.analysis.util.JsonUtil; import com.conveyal.file.FileStorage; @@ -25,12 +24,12 @@ import com.conveyal.r5.analyst.progress.Task; import com.conveyal.r5.analyst.progress.WorkProduct; import com.conveyal.r5.util.ExceptionUtils; -import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.io.Files; import com.mongodb.QueryBuilder; import org.apache.commons.fileupload.FileItem; +import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.io.FilenameUtils; import org.bson.types.ObjectId; import org.slf4j.Logger; @@ -275,7 +274,7 @@ private static FileStorageFormat getFormatCode (PointSet pointSet) { * This method executes in a blocking (synchronous) manner, but it can take a while so should be called within an * non-blocking asynchronous task. */ - private List createFreeFormPointSetsFromCsv(FileItem csvFileItem, Map params) { + private List createFreeFormPointSetsFromCsv(File csvFile, Map params) { String latField = params.get("latField"); String lonField = params.get("lonField"); @@ -296,12 +295,11 @@ private List createFreeFormPointSetsFromCsv(FileItem csvFileIt try { List pointSets = new ArrayList<>(); - InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem); - pointSets.add(FreeFormPointSet.fromCsv(csvStreamProvider, latField, lonField, idField, countField)); + pointSets.add(FreeFormPointSet.fromCsv(csvFile, latField, lonField, idField, countField)); // The second pair of lat and lon fields allow creating two matched pointsets from the same CSV. // This is used for one-to-one travel times between specific origins/destinations. if (latField2 != null && lonField2 != null) { - pointSets.add(FreeFormPointSet.fromCsv(csvStreamProvider, latField2, lonField2, idField, countField)); + pointSets.add(FreeFormPointSet.fromCsv(csvFile, latField2, lonField2, idField, countField)); } return pointSets; } catch (Exception e) { @@ -331,6 +329,7 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, sourceName); addStatusAndRemoveOldStatuses(status); + final List files = new ArrayList<>(); final List fileItems; final FileStorageFormat uploadFormat; final Map parameters; @@ -338,7 +337,11 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res // Validate inputs and parameters, which will throw an exception if there's anything wrong with them. // Call remove() rather than get() so that subsequent code will see only string parameters, not the files. fileItems = formFields.remove("files"); - uploadFormat = detectUploadFormatAndValidate(fileItems); + for (var fi : fileItems) { + var dfi = (DiskFileItem) fi; + files.add(dfi.getStoreLocation()); + } + uploadFormat = detectUploadFormatAndValidate(files); parameters = extractStringParameters(formFields); } catch (Exception e) { status.completeWithError(e); @@ -354,35 +357,35 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res List pointsets = new ArrayList<>(); if (uploadFormat == FileStorageFormat.GRID) { LOG.info("Detected opportunity dataset stored in Conveyal binary format."); - pointsets.addAll(createGridsFromBinaryGridFiles(fileItems, status)); + pointsets.addAll(createGridsFromBinaryGridFiles(files, status)); } else if (uploadFormat == FileStorageFormat.SHP) { LOG.info("Detected opportunity dataset stored as ESRI shapefile."); - pointsets.addAll(createGridsFromShapefile(fileItems, zoom, status)); + pointsets.addAll(createGridsFromShapefile(files, zoom, status)); } else if (uploadFormat == FileStorageFormat.CSV) { LOG.info("Detected opportunity dataset stored as CSV"); // Create a grid even when user has requested a freeform pointset so we have something to visualize. - FileItem csvFileItem = fileItems.get(0); + File csvFile = files.get(0); // FIXME why were we uploading to S3 using the file path not the UUID? // writeFileToS3(csvFile); // TODO report progress / status as with grids. That involves pre-scanning the CSV which would be // facilitated by retaining the CSV server side and later converting to pointset. boolean requestedFreeForm = Boolean.parseBoolean(parameters.get("freeform")); // Hack to enable freeform pointset building without exposing a UI element, via file name. - if (csvFileItem.getName().contains("FREEFORM_PS.")) { + if (csvFile.getName().contains("FREEFORM_PS.")) { requestedFreeForm = true; } if (requestedFreeForm) { LOG.info("Processing CSV as freeform (rather than gridded) pointset as requested."); // This newer process creates a FreeFormPointSet only for the specified count fields, // as well as a Grid to assist in visualization of the uploaded data. - for (FreeFormPointSet freeForm : createFreeFormPointSetsFromCsv(csvFileItem, parameters)) { + for (FreeFormPointSet freeForm : createFreeFormPointSetsFromCsv(csvFile, parameters)) { Grid gridFromFreeForm = Grid.fromFreeForm(freeForm, zoom); pointsets.add(freeForm); pointsets.add(gridFromFreeForm); } } else { // This is the common default process: create a grid for every non-ignored field in the CSV. - pointsets.addAll(createGridsFromCsv(csvFileItem, formFields, zoom, status)); + pointsets.addAll(createGridsFromCsv(csvFile, formFields, zoom, status)); } } if (pointsets.isEmpty()) { @@ -473,7 +476,7 @@ private OpportunityDataset deleteDataset(String id, UserPermissions userPermissi * TODO explain latField2 usage * @return one or two Grids for each numeric column in the CSV input. */ - private List createGridsFromCsv(FileItem csvFileItem, + private List createGridsFromCsv(File csvFile, Map> query, int zoom, OpportunityDatasetUploadStatus status) throws Exception { @@ -488,12 +491,11 @@ private List createGridsFromCsv(FileItem csvFileItem, String lonField2 = HttpUtils.getFormField(query, "lonField2", false); List ignoreFields = Arrays.asList(idField, latField2, lonField2); - InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem); - List grids = Grid.fromCsv(csvStreamProvider, latField, lonField, ignoreFields, zoom, status); + List grids = Grid.fromCsv(csvFile, latField, lonField, ignoreFields, zoom, status); // TODO verify correctness of this second pass if (latField2 != null && lonField2 != null) { ignoreFields = Arrays.asList(idField, latField, lonField); - grids.addAll(Grid.fromCsv(csvStreamProvider, latField2, lonField2, ignoreFields, zoom, status)); + grids.addAll(Grid.fromCsv(csvFile, latField2, lonField2, ignoreFields, zoom, status)); } return grids; @@ -503,14 +505,14 @@ private List createGridsFromCsv(FileItem csvFileItem, * Create a grid from an input stream containing a binary grid file. * For those in the know, we can upload manually created binary grid files. */ - private List createGridsFromBinaryGridFiles(List uploadedFiles, + private List createGridsFromBinaryGridFiles(List uploadedFiles, OpportunityDatasetUploadStatus status) throws Exception { List grids = new ArrayList<>(); status.totalFeatures = uploadedFiles.size(); - for (FileItem fileItem : uploadedFiles) { - Grid grid = Grid.read(fileItem.getInputStream()); - String name = fileItem.getName(); + for (File file : uploadedFiles) { + Grid grid = Grid.read(FileUtils.getInputStream(file)); + String name = file.getName(); // Remove ".grid" from the name if (name.contains(".grid")) name = name.split(".grid")[0]; grid.name = name; @@ -522,37 +524,37 @@ private List createGridsFromBinaryGridFiles(List uploadedFiles, } /** - * Preconditions: fileItems must contain SHP, DBF, and PRJ files, and optionally SHX. All files should have the + * Preconditions: files must contain SHP, DBF, and PRJ files, and optionally SHX. All files should have the * same base name, and should not contain any other files but these three or four. */ - private List createGridsFromShapefile(List fileItems, + private List createGridsFromShapefile(List files, int zoom, OpportunityDatasetUploadStatus status) throws Exception { // In the caller, we should have already verified that all files have the same base name and have an extension. // Extract the relevant files: .shp, .prj, .dbf, and .shx. // We need the SHX even though we're looping over every feature as they might be sparse. - Map filesByExtension = new HashMap<>(); - for (FileItem fileItem : fileItems) { - filesByExtension.put(FilenameUtils.getExtension(fileItem.getName()).toUpperCase(), fileItem); + Map filesByExtension = new HashMap<>(); + for (File file : files) { + filesByExtension.put(FilenameUtils.getExtension(file.getName()).toUpperCase(), file); } // Copy the shapefile component files into a temporary directory with a fixed base name. File tempDir = Files.createTempDir(); File shpFile = new File(tempDir, "grid.shp"); - filesByExtension.get("SHP").write(shpFile); + Files.copy(filesByExtension.get("SHP"), shpFile); File prjFile = new File(tempDir, "grid.prj"); - filesByExtension.get("PRJ").write(prjFile); + Files.copy(filesByExtension.get("PRJ"), prjFile); File dbfFile = new File(tempDir, "grid.dbf"); - filesByExtension.get("DBF").write(dbfFile); + Files.copy(filesByExtension.get("DBF"), dbfFile); // The .shx file is an index. It is optional, and not needed for dense shapefiles. if (filesByExtension.containsKey("SHX")) { File shxFile = new File(tempDir, "grid.shx"); - filesByExtension.get("SHX").write(shxFile); + Files.copy(filesByExtension.get("SHX"), shxFile); } List grids = Grid.fromShapefile(shpFile, zoom, status); diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java index 39f6740ab..2adc53197 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java @@ -15,13 +15,14 @@ import org.slf4j.LoggerFactory; import java.io.File; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.stream.Collectors; -import static com.conveyal.analysis.util.HttpUtils.getFormField; import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate; +import static com.conveyal.analysis.util.HttpUtils.getFormField; import static com.conveyal.file.FileCategory.DATASOURCES; import static com.conveyal.file.FileStorageFormat.SHP; import static com.google.common.base.Preconditions.checkNotNull; @@ -42,7 +43,7 @@ public class DataSourceUploadAction implements TaskAction { private final AnalysisCollection dataSourceCollection; /** The files provided in the HTTP post form. These will be moved into storage. */ - private final List fileItems; + private final List files; /** * This DataSourceIngester provides encapsulated loading and validation logic for a single format, by composition @@ -64,12 +65,12 @@ public String getDataSourceName () { public DataSourceUploadAction ( FileStorage fileStorage, AnalysisCollection dataSourceCollection, - List fileItems, + List files, DataSourceIngester ingester ) { this.fileStorage = fileStorage; this.dataSourceCollection = dataSourceCollection; - this.fileItems = fileItems; + this.files = files; this.ingester = ingester; } @@ -94,18 +95,17 @@ private final void moveFilesIntoStorage (ProgressListener progressListener) { // (with filenames that correspond to the source id) progressListener.beginTask("Moving files into storage...", 1); final String dataSourceId = ingester.dataSource()._id.toString(); - for (FileItem fileItem : fileItems) { - DiskFileItem dfi = (DiskFileItem) fileItem; + for (File file : files) { // Use canonical extension from file type - files may be uploaded with e.g. tif instead of tiff or geotiff. String extension = ingester.dataSource().fileFormat.extension; - if (fileItems.size() > 1) { + if (files.size() > 1) { // If we have multiple files, as with Shapefile, just keep the original extension for each file. // This could lead to orphaned files after a deletion, we might want to implement wildcard deletion. - extension = FilenameUtils.getExtension(fileItem.getName()).toLowerCase(Locale.ROOT); + extension = FilenameUtils.getExtension(file.getName()).toLowerCase(Locale.ROOT); } FileStorageKey key = new FileStorageKey(DATASOURCES, dataSourceId, extension); - fileStorage.moveIntoStorage(key, dfi.getStoreLocation()); - if (fileItems.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) { + fileStorage.moveIntoStorage(key, file); + if (files.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) { file = fileStorage.getFile(key); } } @@ -128,14 +128,20 @@ public static DataSourceUploadAction forFormFields ( final String sourceName = getFormField(formFields, "sourceName", true); final String regionId = getFormField(formFields, "regionId", true); final List fileItems = formFields.get("sourceFiles"); + final List files = new ArrayList<>(); + + for (var fi : fileItems) { + var dfi = (DiskFileItem) fi; + files.add(dfi.getStoreLocation()); + } - FileStorageFormat format = detectUploadFormatAndValidate(fileItems); + FileStorageFormat format = detectUploadFormatAndValidate(files); DataSourceIngester ingester = DataSourceIngester.forFormat(format); - String originalFileNames = fileItems.stream().map(FileItem::getName).collect(Collectors.joining(", ")); + String originalFileNames = files.stream().map(File::getName).collect(Collectors.joining(", ")); ingester.initializeDataSource(sourceName, originalFileNames, regionId, userPermissions); DataSourceUploadAction dataSourceUploadAction = - new DataSourceUploadAction(fileStorage, dataSourceCollection, fileItems, ingester); + new DataSourceUploadAction(fileStorage, dataSourceCollection, files, ingester); return dataSourceUploadAction; } diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java index d85b883e9..13b158fe1 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java @@ -2,8 +2,6 @@ import com.conveyal.file.FileStorageFormat; import com.google.common.collect.Sets; -import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.io.FilenameUtils; import java.io.File; @@ -29,7 +27,7 @@ public abstract class DataSourceUtil { * @throws DataSourceException if the type of the upload can't be detected or preconditions are violated. * @return the expected type of the uploaded file or files, never null. */ - public static FileStorageFormat detectUploadFormatAndValidate (List fileItems) { + public static FileStorageFormat detectUploadFormatAndValidate (List fileItems) { if (isNullOrEmpty(fileItems)) { throw new DataSourceException("You must select some files to upload."); } @@ -78,10 +76,8 @@ public static FileStorageFormat detectUploadFormatAndValidate (List fi * Check that all FileItems supplied are stored in disk files (not memory), that they are all readable and all * have nonzero size. */ - private static void checkFileCharacteristics (List fileItems) { - for (FileItem fileItem : fileItems) { - checkState(fileItem instanceof DiskFileItem, "Uploaded file was not stored to disk."); - File diskFile = ((DiskFileItem)fileItem).getStoreLocation(); + private static void checkFileCharacteristics (List files) { + for (File diskFile : files) { checkState(diskFile.exists(), "Uploaded file does not exist on filesystem as expected."); checkState(diskFile.canRead(), "Read permissions were not granted on uploaded file."); checkState(diskFile.length() > 0, "Uploaded file was empty (contained no data)."); @@ -92,10 +88,10 @@ private static void checkFileCharacteristics (List fileItems) { * Given a list of FileItems, return a set of all unique file extensions present, normalized to lower case. * Always returns a set instance which may be empty, but never null. */ - private static Set extractFileExtensions (List fileItems) { + private static Set extractFileExtensions (List files) { Set fileExtensions = new HashSet<>(); - for (FileItem fileItem : fileItems) { - String fileName = fileItem.getName(); + for (File file : files) { + String fileName = file.getName(); String extension = FilenameUtils.getExtension(fileName); if (extension.isEmpty()) { throw new DataSourceException("Filename has no extension: " + fileName); @@ -106,10 +102,10 @@ private static Set extractFileExtensions (List fileItems) { } /** In uploads containing more than one file, all files are expected to have the same name before the extension. */ - private static void verifyBaseNamesSame (List fileItems) { + private static void verifyBaseNamesSame (List files) { String firstBaseName = null; - for (FileItem fileItem : fileItems) { - String baseName = FilenameUtils.getBaseName(fileItem.getName()); + for (File file : files) { + String baseName = FilenameUtils.getBaseName(file.getName()); if (firstBaseName == null) { firstBaseName = baseName; } else if (!firstBaseName.equals(baseName)) { diff --git a/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java b/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java index d0deaed13..a6d3a6011 100644 --- a/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java +++ b/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java @@ -1,7 +1,7 @@ package com.conveyal.r5.analyst; import com.beust.jcommander.ParameterException; -import com.conveyal.r5.util.InputStreamProvider; +import com.conveyal.file.FileUtils; import com.csvreader.CsvReader; import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; @@ -12,6 +12,7 @@ import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -49,7 +50,7 @@ public class FreeFormPointSet extends PointSet { * for the points; if not, row numbers will be used as the ids. */ public static FreeFormPointSet fromCsv ( - InputStreamProvider csvInputStreamProvider, + File csvFile, String latField, String lonField, String idField, @@ -61,7 +62,7 @@ public static FreeFormPointSet fromCsv ( int lonCol = -1; int idCol = -1; int countCol = -1; - try (InputStream csvInputStream = csvInputStreamProvider.getInputStream()) { + try (InputStream csvInputStream = FileUtils.getInputStream(csvFile)) { CsvReader reader = new CsvReader(csvInputStream, ',', StandardCharsets.UTF_8); reader.readHeaders(); int nCols = reader.getHeaderCount(); @@ -105,7 +106,7 @@ public static FreeFormPointSet fromCsv ( /* If we reached here, the file is entirely readable. Re-read it from the beginning and record values. */ // Note that we're doing two passes just so we know the array size. We could just use TIntLists. int rec = -1; - try (InputStream csvInputStream = csvInputStreamProvider.getInputStream()) { + try (InputStream csvInputStream = FileUtils.getInputStream(csvFile)) { CsvReader reader = new CsvReader(csvInputStream, ',', StandardCharsets.UTF_8); FreeFormPointSet ret = new FreeFormPointSet(nRecs); ret.name = countField != null ? countField : "[COUNT]"; diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java index ad9faf1ed..0ec13b52f 100644 --- a/src/main/java/com/conveyal/r5/analyst/Grid.java +++ b/src/main/java/com/conveyal/r5/analyst/Grid.java @@ -1,8 +1,8 @@ package com.conveyal.r5.analyst; import com.conveyal.analysis.datasource.DataSourceException; +import com.conveyal.file.FileUtils; import com.conveyal.r5.common.GeometryUtils; -import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; import com.conveyal.r5.util.ShapefileReader; import com.csvreader.CsvReader; @@ -554,7 +554,7 @@ public static Polygon getPixelGeometry (int localX, int localY, WebMercatorExten * @param ignoreFields if this is non-null, the fields with these names will not be considered when looking for * numeric opportunity count fields. Null strings in the collection are ignored. */ - public static List fromCsv(InputStreamProvider csvInputStreamProvider, + public static List fromCsv(File csvFile, String latField, String lonField, Collection ignoreFields, @@ -563,7 +563,7 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, // Read through the CSV file once to establish its structure (which fields are numeric). // TODO factor out this logic for all CSV loading, reuse for freeform and grids, set progress properly. - CsvReader reader = new CsvReader(csvInputStreamProvider.getInputStream(), StandardCharsets.UTF_8); + CsvReader reader = new CsvReader(FileUtils.getInputStream(csvFile), StandardCharsets.UTF_8); reader.readHeaders(); List headers = Arrays.asList(reader.getHeaders()); if (!headers.contains(latField)) { @@ -646,7 +646,7 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, // The first read through the CSV just established its structure (e.g. which fields were numeric). // Now, re-read the CSV from the beginning to load the values and populate the grids. - reader = new CsvReader(csvInputStreamProvider.getInputStream(), StandardCharsets.UTF_8); + reader = new CsvReader(FileUtils.getInputStream(csvFile), StandardCharsets.UTF_8); reader.readHeaders(); int i = 0; From 3d99cea278d06f36d5b8a7ef16e5bcb483cf6992 Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 27 Oct 2023 16:31:14 +0800 Subject: [PATCH 02/10] Set the status before returning the error --- .../analysis/controllers/OpportunityDatasetController.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index 5941cfeec..a8b6f1118 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -345,6 +345,7 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res parameters = extractStringParameters(formFields); } catch (Exception e) { status.completeWithError(e); + res.status(400); return status; } From aac3cea7732e0645f56098904c112665a8543225 Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 27 Oct 2023 17:20:33 +0800 Subject: [PATCH 03/10] Move files into a scratch directory before processing --- .../controllers/OpportunityDatasetController.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index a8b6f1118..66509bcec 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -329,6 +329,8 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, sourceName); addStatusAndRemoveOldStatuses(status); + // TODO should we delete this temporary directory at the end? + final File tmpDirectory = FileUtils.createScratchDirectory(); final List files = new ArrayList<>(); final List fileItems; final FileStorageFormat uploadFormat; @@ -338,8 +340,9 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res // Call remove() rather than get() so that subsequent code will see only string parameters, not the files. fileItems = formFields.remove("files"); for (var fi : fileItems) { - var dfi = (DiskFileItem) fi; - files.add(dfi.getStoreLocation()); + var tmpFile = new File(tmpDirectory, fi.getName()); + Files.move(((DiskFileItem) fi).getStoreLocation(), tmpFile); + files.add(tmpFile); } uploadFormat = detectUploadFormatAndValidate(files); parameters = extractStringParameters(formFields); @@ -541,7 +544,7 @@ private List createGridsFromShapefile(List files, } // Copy the shapefile component files into a temporary directory with a fixed base name. - File tempDir = Files.createTempDir(); + File tempDir = FileUtils.createScratchDirectory(); File shpFile = new File(tempDir, "grid.shp"); Files.copy(filesByExtension.get("SHP"), shpFile); From 7cd21db53428ef82ee9bfcd49ada196c794362b9 Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Sat, 28 Oct 2023 13:44:29 +0800 Subject: [PATCH 04/10] Fix NPE from incorrectly re-naming local variable --- .../datasource/DataSourceUploadAction.java | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java index 2adc53197..5f621635a 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java @@ -25,8 +25,6 @@ import static com.conveyal.analysis.util.HttpUtils.getFormField; import static com.conveyal.file.FileCategory.DATASOURCES; import static com.conveyal.file.FileStorageFormat.SHP; -import static com.google.common.base.Preconditions.checkNotNull; -import static com.google.common.base.Preconditions.checkState; /** * Given a batch of uploaded files, put them into FileStorage, categorize and validate them, and record metadata as @@ -51,12 +49,6 @@ public class DataSourceUploadAction implements TaskAction { */ private DataSourceIngester ingester; - /** - * The file to be ingested, after it has been moved into storage. For Shapefiles and other such "sidecar" formats, - * this is the main file (.shp), with the same base name and in the same directory as all its sidecar files. - */ - private File file; - // This method is a stopgaps - it seems like this should be done differently. public String getDataSourceName () { return ingester.dataSource().name; @@ -77,8 +69,8 @@ public DataSourceUploadAction ( @Override public final void action (ProgressListener progressListener) throws Exception { progressListener.setWorkProduct(ingester.dataSource().toWorkProduct()); - moveFilesIntoStorage(progressListener); - ingester.ingest(file, progressListener); + File dataSourceFile = moveFilesIntoStorage(progressListener); + ingester.ingest(dataSourceFile, progressListener); dataSourceCollection.insert(ingester.dataSource()); } @@ -90,11 +82,12 @@ public final void action (ProgressListener progressListener) throws Exception { * We should also consider whether preprocessing like conversion of GTFS to MapDBs should happen at this upload * stage. If so, then this logic needs to change a bit. */ - private final void moveFilesIntoStorage (ProgressListener progressListener) { + private File moveFilesIntoStorage (ProgressListener progressListener) { // Loop through uploaded files, registering the extensions and writing to storage // (with filenames that correspond to the source id) progressListener.beginTask("Moving files into storage...", 1); final String dataSourceId = ingester.dataSource()._id.toString(); + File dataSourceFile = null; for (File file : files) { // Use canonical extension from file type - files may be uploaded with e.g. tif instead of tiff or geotiff. String extension = ingester.dataSource().fileFormat.extension; @@ -106,11 +99,14 @@ private final void moveFilesIntoStorage (ProgressListener progressListener) { FileStorageKey key = new FileStorageKey(DATASOURCES, dataSourceId, extension); fileStorage.moveIntoStorage(key, file); if (files.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) { - file = fileStorage.getFile(key); + dataSourceFile = fileStorage.getFile(key); } } - checkNotNull(file); - checkState(file.exists()); + + if (dataSourceFile == null || !dataSourceFile.exists()) { + throw new DataSourceException("Uploaded file cannot be found."); + } + return dataSourceFile; } /** From e273f5be53199697a262fb8777ce14af3d2e5e96 Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 27 Oct 2023 19:23:20 +0800 Subject: [PATCH 05/10] Handle Zip files on upload Extract them into their individual files before handling like normal files. Refactors all controllers that handle file uploads to use the new utilities. --- .../controllers/BundleController.java | 24 ++++--- .../OpportunityDatasetController.java | 25 +++----- .../datasource/DataSourceUploadAction.java | 14 +---- .../analysis/datasource/DataSourceUtil.java | 11 ++-- .../com/conveyal/analysis/util/HttpUtils.java | 55 ++++++++++++++-- .../java/com/conveyal/file/FileUtils.java | 63 ++++++++++++++++--- .../analyst/progress/ProgressInputStream.java | 10 +++ 7 files changed, 146 insertions(+), 56 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index b7fc71cc5..e38464c83 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -13,20 +13,18 @@ import com.conveyal.file.FileUtils; import com.conveyal.gtfs.GTFSCache; import com.conveyal.gtfs.GTFSFeed; -import com.conveyal.gtfs.error.GTFSError; import com.conveyal.gtfs.error.GeneralError; import com.conveyal.gtfs.model.Stop; import com.conveyal.gtfs.validator.PostLoadValidator; import com.conveyal.osmlib.Node; import com.conveyal.osmlib.OSM; -import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.cluster.TransportNetworkConfig; +import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.progress.Task; import com.conveyal.r5.streets.OSMCache; import com.conveyal.r5.util.ExceptionUtils; import com.mongodb.QueryBuilder; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.bson.types.ObjectId; import org.locationtech.jts.geom.Envelope; import org.slf4j.Logger; @@ -107,19 +105,25 @@ private Bundle create (Request req, Response res) { // Do some initial synchronous work setting up the bundle to fail fast if the request is bad. final Map> files = HttpUtils.getRequestFiles(req.raw()); final Bundle bundle = new Bundle(); + final File osmPbfFile; + final List gtfsZipFiles; try { bundle.name = files.get("bundleName").get(0).getString("UTF-8"); bundle.regionId = files.get("regionId").get(0).getString("UTF-8"); if (files.get("osmId") != null) { + osmPbfFile = null; bundle.osmId = files.get("osmId").get(0).getString("UTF-8"); Bundle bundleWithOsm = Persistence.bundles.find(QueryBuilder.start("osmId").is(bundle.osmId).get()).next(); if (bundleWithOsm == null) { throw AnalysisServerException.badRequest("Selected OSM does not exist."); } + } else { + osmPbfFile = HttpUtils.storeFileItem(files.get("osm").get(0)); } if (files.get("feedGroupId") != null) { + gtfsZipFiles = null; bundle.feedGroupId = files.get("feedGroupId").get(0).getString("UTF-8"); Bundle bundleWithFeed = Persistence.bundles.find(QueryBuilder.start("feedGroupId").is(bundle.feedGroupId).get()).next(); if (bundleWithFeed == null) { @@ -134,6 +138,8 @@ private Bundle create (Request req, Response res) { bundle.feeds = bundleWithFeed.feeds; bundle.feedsComplete = bundleWithFeed.feedsComplete; bundle.totalFeeds = bundleWithFeed.totalFeeds; + } else { + gtfsZipFiles = HttpUtils.storeFileItems(files.get("feedGroup")); } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; @@ -155,16 +161,15 @@ private Bundle create (Request req, Response res) { .withWorkProduct(BUNDLE, bundle._id, bundle.regionId) .withAction(progressListener -> { try { - if (bundle.osmId == null) { + if (osmPbfFile != null) { // Process uploaded OSM. bundle.osmId = new ObjectId().toString(); - DiskFileItem fi = (DiskFileItem) files.get("osm").get(0); // Here we perform minimal validation by loading the OSM, but don't retain the resulting MapDB. OSM osm = new OSM(null); osm.intersectionDetection = true; // Number of entities in an OSM file is unknown, so derive progress from the number of bytes read. // Wrapping in buffered input stream should reduce number of progress updates. - osm.readPbf(ProgressInputStream.forFileItem(fi, progressListener)); + osm.readPbf(ProgressInputStream.forFile(osmPbfFile, progressListener)); // osm.readPbf(new BufferedInputStream(fi.getInputStream())); Envelope osmBounds = new Envelope(); for (Node n : osm.nodes.values()) { @@ -173,10 +178,10 @@ private Bundle create (Request req, Response res) { osm.close(); checkWgsEnvelopeSize(osmBounds, "OSM data"); // Store the source OSM file. Note that we're not storing the derived MapDB file here. - fileStorage.moveIntoStorage(OSMCache.getKey(bundle.osmId), fi.getStoreLocation()); + fileStorage.moveIntoStorage(OSMCache.getKey(bundle.osmId), osmPbfFile); } - if (bundle.feedGroupId == null) { + if (gtfsZipFiles != null) { // Process uploaded GTFS files bundle.feedGroupId = new ObjectId().toString(); @@ -186,8 +191,7 @@ private Bundle create (Request req, Response res) { bundle.feeds = new ArrayList<>(); bundle.totalFeeds = files.get("feedGroup").size(); - for (FileItem fileItem : files.get("feedGroup")) { - File feedFile = ((DiskFileItem) fileItem).getStoreLocation(); + for (File feedFile : gtfsZipFiles) { ZipFile zipFile = new ZipFile(feedFile); File tempDbFile = FileUtils.createScratchFile("db"); File tempDbpFile = new File(tempDbFile.getAbsolutePath() + ".p"); diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index 66509bcec..e731cf336 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -3,6 +3,7 @@ import com.conveyal.analysis.AnalysisServerException; import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.TaskScheduler; +import com.conveyal.analysis.datasource.DataSourceUtil; import com.conveyal.analysis.grids.SeamlessCensusGridExtractor; import com.conveyal.analysis.models.DataGroup; import com.conveyal.analysis.models.OpportunityDataset; @@ -29,7 +30,6 @@ import com.google.common.io.Files; import com.mongodb.QueryBuilder; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.io.FilenameUtils; import org.bson.types.ObjectId; import org.slf4j.Logger; @@ -55,7 +55,6 @@ import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; -import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate; import static com.conveyal.analysis.util.JsonUtil.toJson; import static com.conveyal.file.FileCategory.GRIDS; import static com.conveyal.r5.analyst.WebMercatorExtents.parseZoom; @@ -327,24 +326,15 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res // are recorded in a persistent purpose-built way rather than falling back on the UI's catch-all error window. // TODO more standardized mechanism for tracking asynchronous tasks and catching exceptions on them OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, sourceName); - addStatusAndRemoveOldStatuses(status); - // TODO should we delete this temporary directory at the end? - final File tmpDirectory = FileUtils.createScratchDirectory(); - final List files = new ArrayList<>(); - final List fileItems; + final List files; final FileStorageFormat uploadFormat; final Map parameters; try { // Validate inputs and parameters, which will throw an exception if there's anything wrong with them. // Call remove() rather than get() so that subsequent code will see only string parameters, not the files. - fileItems = formFields.remove("files"); - for (var fi : fileItems) { - var tmpFile = new File(tmpDirectory, fi.getName()); - Files.move(((DiskFileItem) fi).getStoreLocation(), tmpFile); - files.add(tmpFile); - } - uploadFormat = detectUploadFormatAndValidate(files); + files = HttpUtils.storeFileItemsAndUnzip(formFields.remove("files")); + uploadFormat = DataSourceUtil.detectUploadFormatAndValidate(files); parameters = extractStringParameters(formFields); } catch (Exception e) { status.completeWithError(e); @@ -352,6 +342,9 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res return status; } + // Add the status to the region wide tracker before we begin the heavy tasks. + addStatusAndRemoveOldStatuses(status); + // We are going to call several potentially slow blocking methods to create and persist new pointsets. // This whole series of actions will be run sequentially but within an asynchronous Executor task. // After enqueueing, the status is returned so the UI can track progress. @@ -631,6 +624,7 @@ public static class OpportunityDatasetUploadStatus implements ProgressListener { public Status status = Status.PROCESSING; public String name; public String message; + public String stackTrace; public Date createdAt; public Date completedAt; @@ -647,7 +641,8 @@ private void completed (Status status) { } public void completeWithError (Exception e) { - message = "Unable to create opportunity dataset. " + ExceptionUtils.stackTraceString(e); + stackTrace = ExceptionUtils.stackTraceString(e); + message = "Unable to create opportunity dataset. " + e.getMessage(); completed(Status.ERROR); } diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java index 5f621635a..4e3dd3b31 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java @@ -3,25 +3,23 @@ import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.models.DataSource; import com.conveyal.analysis.persistence.AnalysisCollection; +import com.conveyal.analysis.util.HttpUtils; import com.conveyal.file.FileStorage; import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; import com.conveyal.r5.analyst.progress.ProgressListener; import com.conveyal.r5.analyst.progress.TaskAction; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.io.FilenameUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; -import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.stream.Collectors; -import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate; import static com.conveyal.analysis.util.HttpUtils.getFormField; import static com.conveyal.file.FileCategory.DATASOURCES; import static com.conveyal.file.FileStorageFormat.SHP; @@ -123,15 +121,9 @@ public static DataSourceUploadAction forFormFields ( // Extract required parameters. Throws AnalysisServerException on failure, e.g. if a field is missing. final String sourceName = getFormField(formFields, "sourceName", true); final String regionId = getFormField(formFields, "regionId", true); - final List fileItems = formFields.get("sourceFiles"); - final List files = new ArrayList<>(); + final List files = HttpUtils.storeFileItemsAndUnzip(formFields.get("sourceFiles")); - for (var fi : fileItems) { - var dfi = (DiskFileItem) fi; - files.add(dfi.getStoreLocation()); - } - - FileStorageFormat format = detectUploadFormatAndValidate(files); + FileStorageFormat format = DataSourceUtil.detectUploadFormatAndValidate(files); DataSourceIngester ingester = DataSourceIngester.forFormat(format); String originalFileNames = files.stream().map(File::getName).collect(Collectors.joining(", ")); diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java index 13b158fe1..01222c210 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java @@ -27,15 +27,15 @@ public abstract class DataSourceUtil { * @throws DataSourceException if the type of the upload can't be detected or preconditions are violated. * @return the expected type of the uploaded file or files, never null. */ - public static FileStorageFormat detectUploadFormatAndValidate (List fileItems) { - if (isNullOrEmpty(fileItems)) { + public static FileStorageFormat detectUploadFormatAndValidate (List files) { + if (isNullOrEmpty(files)) { throw new DataSourceException("You must select some files to upload."); } - Set fileExtensions = extractFileExtensions(fileItems); + Set fileExtensions = extractFileExtensions(files); if (fileExtensions.isEmpty()) { throw new DataSourceException("No file extensions seen, cannot detect upload type."); } - checkFileCharacteristics(fileItems); + checkFileCharacteristics(files); if (fileExtensions.contains("zip")) { throw new DataSourceException("Upload of spatial .zip files not yet supported"); // TODO unzip and process unzipped files - will need to peek inside to detect GTFS uploads first. @@ -45,7 +45,7 @@ public static FileStorageFormat detectUploadFormatAndValidate (List fileIt final Set shapefileExtensions = Sets.newHashSet("shp", "dbf", "prj"); if ( ! Sets.intersection(fileExtensions, shapefileExtensions).isEmpty()) { if (fileExtensions.containsAll(shapefileExtensions)) { - verifyBaseNamesSame(fileItems); + verifyBaseNamesSame(files); // TODO check that any additional file is .shx, and that there are no more than 4 files. } else { throw new DataSourceException("You must multi-select at least SHP, DBF, and PRJ files for shapefile upload."); @@ -113,5 +113,4 @@ private static void verifyBaseNamesSame (List files) { } } } - } diff --git a/src/main/java/com/conveyal/analysis/util/HttpUtils.java b/src/main/java/com/conveyal/analysis/util/HttpUtils.java index 53cb2a04a..b84f77ff3 100644 --- a/src/main/java/com/conveyal/analysis/util/HttpUtils.java +++ b/src/main/java/com/conveyal/analysis/util/HttpUtils.java @@ -1,14 +1,17 @@ package com.conveyal.analysis.util; import com.conveyal.analysis.AnalysisServerException; +import com.conveyal.analysis.datasource.DataSourceException; +import com.conveyal.file.FileUtils; import com.conveyal.r5.util.ExceptionUtils; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.FileItemFactory; -import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.commons.fileupload.servlet.ServletFileUpload; import javax.servlet.http.HttpServletRequest; +import java.io.File; +import java.io.IOException; import java.io.UnsupportedEncodingException; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -36,8 +39,7 @@ public static Map> getRequestFiles (HttpServletRequest re // If we always saved the FileItems via write() or read them with getInputStream() they would not all need to // be on disk. try { - FileItemFactory fileItemFactory = new DiskFileItemFactory(0, null); - ServletFileUpload sfu = new ServletFileUpload(fileItemFactory); + ServletFileUpload sfu = new ServletFileUpload(); return sfu.parseParameterMap(req); } catch (Exception e) { throw AnalysisServerException.fileUpload(ExceptionUtils.stackTraceString(e)); @@ -66,4 +68,49 @@ public static String getFormField(Map> formFields, String fieldName)); } } + + public static List storeFileItemsAndUnzip(List fileItems) { + return storeFileItemsAndUnzip(fileItems, FileUtils.createScratchDirectory()); + } + + /** + * Convert `FileItem`s into `File`s and move them into the given directory. Automatically unzip files. Return the + * list of new `File` handles. + */ + public static List storeFileItemsAndUnzip(List fileItems, File directory) { + List files = new ArrayList<>(); + for (FileItem fi : fileItems) { + File file = storeFileItemInDirectory(fi, directory); + String name = file.getName(); + if (name.toLowerCase().endsWith(".zip")) { + files.addAll(FileUtils.unZipFileIntoDirectory(file, directory)); + } else { + files.add(file); + } + } + return files; + } + + public static List storeFileItems(List fileItems) { + File directory = FileUtils.createScratchDirectory(); + List files = new ArrayList<>(); + for (FileItem fileItem : fileItems) { + files.add(storeFileItemInDirectory(fileItem, directory)); + } + return files; + } + + public static File storeFileItem(FileItem fileItem) { + return storeFileItemInDirectory(fileItem, FileUtils.createScratchDirectory()); + } + + public static File storeFileItemInDirectory(FileItem fileItem, File directory) { + try { + File file = new File(directory, fileItem.getName()); + fileItem.getInputStream().transferTo(FileUtils.getOutputStream(file)); + return file; + } catch (IOException e) { + throw new DataSourceException(e.getMessage()); + } + } } diff --git a/src/main/java/com/conveyal/file/FileUtils.java b/src/main/java/com/conveyal/file/FileUtils.java index f2eb94842..704fc2e07 100644 --- a/src/main/java/com/conveyal/file/FileUtils.java +++ b/src/main/java/com/conveyal/file/FileUtils.java @@ -1,17 +1,13 @@ package com.conveyal.file; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.RandomAccessFile; +import java.io.*; import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; import java.util.zip.GZIPInputStream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; public abstract class FileUtils { /** @@ -123,4 +119,51 @@ public static boolean isGzip (File file) { throw new RuntimeException(e); } } + + /** + * Extract the files inside a zipped file into a given directory and return the `File` handles to the new files. + */ + public static List unZipFileIntoDirectory(File zipFile, File directory) { + List files = new ArrayList<>(); + ZipInputStream zis = new ZipInputStream(FileUtils.getInputStream(zipFile)); + ZipEntry zipEntry; + byte[] buffer = new byte[1024]; + try { + while ((zipEntry = zis.getNextEntry()) != null) { + String entryName = zipEntry.getName(); + File newFile = new File(directory, entryName); + if (zipEntry.isDirectory()) { + // Skip special `__MACOSX` directories. + if (entryName.toUpperCase().contains("__MACOSX")) continue; + if (!newFile.isDirectory() && !newFile.mkdirs()) { + throw new IOException("Failed to create directory " + newFile); + } + } else { + // Skip file names beginning with a "." + if (Path.of(entryName).getFileName().startsWith(".")) continue; + + // fix for Windows-created archives + File parent = newFile.getParentFile(); + if (!parent.isDirectory() && !parent.mkdirs()) { + throw new IOException("Failed to create directory " + parent); + } + + // write file content + FileOutputStream fos = new FileOutputStream(newFile); + int len; + while ((len = zis.read(buffer)) > 0) { + fos.write(buffer, 0, len); + } + fos.close(); + files.add(newFile); + } + } + zis.closeEntry(); + zis.close(); + } catch (IOException e) { + throw new RuntimeException(e.getMessage()); + } + + return files; + } } diff --git a/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java b/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java index bc02502d2..b8efca7f8 100644 --- a/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java +++ b/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java @@ -1,8 +1,10 @@ package com.conveyal.r5.analyst.progress; +import com.conveyal.file.FileUtils; import org.apache.commons.fileupload.FileItem; import org.apache.commons.io.input.ProxyInputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -54,4 +56,12 @@ public static ProgressInputStream forFileItem (FileItem fileItem, ProgressListen } } + /** + * Given an uploaded file, report progress on reading it. + */ + public static ProgressInputStream forFile (File file, ProgressListener progressListener) { + checkArgument(file.length() < Integer.MAX_VALUE); + progressListener.beginTask("Reading file " + file.getName(), (int)(file.length())); + return new ProgressInputStream(progressListener, FileUtils.getInputStream(file)); + } } From 6bceedc48df11d51989b5579e00bcb0b0fb897e4 Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 27 Oct 2023 19:28:44 +0800 Subject: [PATCH 06/10] Remove unused method --- .../analyst/progress/ProgressInputStream.java | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java b/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java index b8efca7f8..417ae6de9 100644 --- a/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java +++ b/src/main/java/com/conveyal/r5/analyst/progress/ProgressInputStream.java @@ -1,7 +1,6 @@ package com.conveyal.r5.analyst.progress; import com.conveyal.file.FileUtils; -import org.apache.commons.fileupload.FileItem; import org.apache.commons.io.input.ProxyInputStream; import java.io.File; @@ -15,7 +14,7 @@ * This will report progress as the total number of bytes that have passed through the stream, like CountingInputStream. * This can exceed 100% of the file size if the caller uses mark and reset. The progressListener should be * pre-configured with the total number of bytes expected and a detail message using ProgressListener::beginTask. - * The static method forFileItem() demonstrates usage when reading from a file of known length. + * The static method forFile() demonstrates usage when reading from a file of known length. */ public class ProgressInputStream extends ProxyInputStream { @@ -40,22 +39,6 @@ public synchronized long skip (final long length) throws IOException { return skippedBytes; } - /** - * Given an uploaded file, report progress on reading it. - * Incrementing the progress seems to introduce some inefficiency when performing unbuffered small reads, such as - * calls to InputStream.read() which are used by DataInputStream to read numbers. - * TODO wrap in buffered input stream to reduce small read calls, or tune to only report once per percentage? - */ - public static ProgressInputStream forFileItem (FileItem fileItem, ProgressListener progressListener) { - try { - checkArgument(fileItem.getSize() < Integer.MAX_VALUE); - progressListener.beginTask("Reading file " + fileItem.getName(), (int)(fileItem.getSize())); - return new ProgressInputStream(progressListener, fileItem.getInputStream()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - /** * Given an uploaded file, report progress on reading it. */ From 862f91182d48942e00b858d8ea1a94d2cd3a909d Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 27 Oct 2023 19:28:55 +0800 Subject: [PATCH 07/10] Update comments and consolidate methods --- .../com/conveyal/analysis/util/HttpUtils.java | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/util/HttpUtils.java b/src/main/java/com/conveyal/analysis/util/HttpUtils.java index b84f77ff3..55f8e1047 100644 --- a/src/main/java/com/conveyal/analysis/util/HttpUtils.java +++ b/src/main/java/com/conveyal/analysis/util/HttpUtils.java @@ -30,14 +30,6 @@ public static Map> getRequestFiles (HttpServletRequest re // The Javadoc on this factory class doesn't say anything about thread safety. Looking at the source code it // all looks threadsafe. But also very lightweight to instantiate, so in this code run by multiple threads // we play it safe and always create a new factory. - // Setting a size threshold of 0 causes all files to be written to disk, which allows processing them in a - // uniform way in other threads, after the request handler has returned. This does however cause some very - // small form fields to be written to disk files. Ideally we'd identify the smallest actual file we'll ever - // handle and set the threshold a little higher. The downside is that if a tiny file is actually uploaded even - // by accident, our code will not be able to get a file handle for it and fail. Some legitimate files like - // Shapefile .prj sidecars can be really small. - // If we always saved the FileItems via write() or read them with getInputStream() they would not all need to - // be on disk. try { ServletFileUpload sfu = new ServletFileUpload(); return sfu.parseParameterMap(req); @@ -69,15 +61,12 @@ public static String getFormField(Map> formFields, String } } - public static List storeFileItemsAndUnzip(List fileItems) { - return storeFileItemsAndUnzip(fileItems, FileUtils.createScratchDirectory()); - } - /** - * Convert `FileItem`s into `File`s and move them into the given directory. Automatically unzip files. Return the - * list of new `File` handles. + * Convert `FileItem`s into `File`s and move them into a temp directory. Automatically unzip files. Return the list + * of new `File` handles. */ - public static List storeFileItemsAndUnzip(List fileItems, File directory) { + public static List storeFileItemsAndUnzip(List fileItems) { + File directory = FileUtils.createScratchDirectory(); List files = new ArrayList<>(); for (FileItem fi : fileItems) { File file = storeFileItemInDirectory(fi, directory); @@ -91,6 +80,9 @@ public static List storeFileItemsAndUnzip(List fileItems, File d return files; } + /** + * Convert `FileItem`s into `File`s and move them into a temporary directory. Return the list of new `File` handles. + */ public static List storeFileItems(List fileItems) { File directory = FileUtils.createScratchDirectory(); List files = new ArrayList<>(); From 9b8d4386b6d3b61b5379995a69f08c3f099d19b8 Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Sat, 28 Oct 2023 11:26:46 +0800 Subject: [PATCH 08/10] Revert removal of `DiskFileItemFactory` --- .../com/conveyal/analysis/util/HttpUtils.java | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/util/HttpUtils.java b/src/main/java/com/conveyal/analysis/util/HttpUtils.java index 55f8e1047..fc1c5aa70 100644 --- a/src/main/java/com/conveyal/analysis/util/HttpUtils.java +++ b/src/main/java/com/conveyal/analysis/util/HttpUtils.java @@ -1,15 +1,15 @@ package com.conveyal.analysis.util; import com.conveyal.analysis.AnalysisServerException; -import com.conveyal.analysis.datasource.DataSourceException; import com.conveyal.file.FileUtils; import com.conveyal.r5.util.ExceptionUtils; import org.apache.commons.fileupload.FileItem; +import org.apache.commons.fileupload.disk.DiskFileItem; +import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.commons.fileupload.servlet.ServletFileUpload; import javax.servlet.http.HttpServletRequest; import java.io.File; -import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; @@ -30,8 +30,17 @@ public static Map> getRequestFiles (HttpServletRequest re // The Javadoc on this factory class doesn't say anything about thread safety. Looking at the source code it // all looks threadsafe. But also very lightweight to instantiate, so in this code run by multiple threads // we play it safe and always create a new factory. + // Setting a size threshold of 0 causes all files to be written to disk, which allows processing them in a + // uniform way in other threads, after the request handler has returned. This does however cause some very + // small form fields to be written to disk files. Ideally we'd identify the smallest actual file we'll ever + // handle and set the threshold a little higher. The downside is that if a tiny file is actually uploaded even + // by accident, our code will not be able to get a file handle for it and fail. Some legitimate files like + // Shapefile .prj sidecars can be really small. + // If we always saved the FileItems via write() or read them with getInputStream() they would not all need to + // be on disk. try { - ServletFileUpload sfu = new ServletFileUpload(); + DiskFileItemFactory diskFileItemFactory = new DiskFileItemFactory(0, null); + ServletFileUpload sfu = new ServletFileUpload(diskFileItemFactory); return sfu.parseParameterMap(req); } catch (Exception e) { throw AnalysisServerException.fileUpload(ExceptionUtils.stackTraceString(e)); @@ -97,12 +106,11 @@ public static File storeFileItem(FileItem fileItem) { } public static File storeFileItemInDirectory(FileItem fileItem, File directory) { - try { - File file = new File(directory, fileItem.getName()); - fileItem.getInputStream().transferTo(FileUtils.getOutputStream(file)); - return file; - } catch (IOException e) { - throw new DataSourceException(e.getMessage()); + File file = new File(directory, fileItem.getName()); + boolean renameSuccessful = ((DiskFileItem) fileItem).getStoreLocation().renameTo(file); + if (!renameSuccessful) { + throw AnalysisServerException.fileUpload("Error storing file in directory on disk. File.renameTo() failed."); } + return file; } } From a44799de67418f72c7154968efd3bff0996b6eb0 Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Sat, 28 Oct 2023 15:10:30 +0800 Subject: [PATCH 09/10] Rename methods for clarity --- .../controllers/BundleController.java | 4 +-- .../OpportunityDatasetController.java | 2 +- .../datasource/DataSourceUploadAction.java | 2 +- .../com/conveyal/analysis/util/HttpUtils.java | 28 +++++++++++-------- .../java/com/conveyal/file/FileUtils.java | 2 +- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index e38464c83..ef9111285 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -119,7 +119,7 @@ private Bundle create (Request req, Response res) { throw AnalysisServerException.badRequest("Selected OSM does not exist."); } } else { - osmPbfFile = HttpUtils.storeFileItem(files.get("osm").get(0)); + osmPbfFile = HttpUtils.saveFileItemLocally(files.get("osm").get(0)); } if (files.get("feedGroupId") != null) { @@ -139,7 +139,7 @@ private Bundle create (Request req, Response res) { bundle.feedsComplete = bundleWithFeed.feedsComplete; bundle.totalFeeds = bundleWithFeed.totalFeeds; } else { - gtfsZipFiles = HttpUtils.storeFileItems(files.get("feedGroup")); + gtfsZipFiles = HttpUtils.saveFileItemsLocally(files.get("feedGroup")); } UserPermissions userPermissions = UserPermissions.from(req); bundle.accessGroup = userPermissions.accessGroup; diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index e731cf336..3e320d43b 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -333,7 +333,7 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res try { // Validate inputs and parameters, which will throw an exception if there's anything wrong with them. // Call remove() rather than get() so that subsequent code will see only string parameters, not the files. - files = HttpUtils.storeFileItemsAndUnzip(formFields.remove("files")); + files = HttpUtils.extractFilesFromFileItemsAndUnzip(formFields.remove("files")); uploadFormat = DataSourceUtil.detectUploadFormatAndValidate(files); parameters = extractStringParameters(formFields); } catch (Exception e) { diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java index 4e3dd3b31..28028f861 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java @@ -121,7 +121,7 @@ public static DataSourceUploadAction forFormFields ( // Extract required parameters. Throws AnalysisServerException on failure, e.g. if a field is missing. final String sourceName = getFormField(formFields, "sourceName", true); final String regionId = getFormField(formFields, "regionId", true); - final List files = HttpUtils.storeFileItemsAndUnzip(formFields.get("sourceFiles")); + final List files = HttpUtils.extractFilesFromFileItemsAndUnzip(formFields.get("sourceFiles")); FileStorageFormat format = DataSourceUtil.detectUploadFormatAndValidate(files); DataSourceIngester ingester = DataSourceIngester.forFormat(format); diff --git a/src/main/java/com/conveyal/analysis/util/HttpUtils.java b/src/main/java/com/conveyal/analysis/util/HttpUtils.java index fc1c5aa70..978dd3f52 100644 --- a/src/main/java/com/conveyal/analysis/util/HttpUtils.java +++ b/src/main/java/com/conveyal/analysis/util/HttpUtils.java @@ -71,17 +71,17 @@ public static String getFormField(Map> formFields, String } /** - * Convert `FileItem`s into `File`s and move them into a temp directory. Automatically unzip files. Return the list - * of new `File` handles. + * Extracts `FileItem`s contents locally in a temp directory. Automatically unzip files. Return the list of new + * `File` handles. */ - public static List storeFileItemsAndUnzip(List fileItems) { + public static List extractFilesFromFileItemsAndUnzip(List fileItems) { File directory = FileUtils.createScratchDirectory(); List files = new ArrayList<>(); for (FileItem fi : fileItems) { - File file = storeFileItemInDirectory(fi, directory); + File file = moveFileItemIntoDirectory(fi, directory); String name = file.getName(); if (name.toLowerCase().endsWith(".zip")) { - files.addAll(FileUtils.unZipFileIntoDirectory(file, directory)); + files.addAll(FileUtils.unzipFileIntoDirectory(file, directory)); } else { files.add(file); } @@ -90,22 +90,28 @@ public static List storeFileItemsAndUnzip(List fileItems) { } /** - * Convert `FileItem`s into `File`s and move them into a temporary directory. Return the list of new `File` handles. + * Move `FileItem`s contents into a temporary directory. Return the list of new `File` handles. */ - public static List storeFileItems(List fileItems) { + public static List saveFileItemsLocally(List fileItems) { File directory = FileUtils.createScratchDirectory(); List files = new ArrayList<>(); for (FileItem fileItem : fileItems) { - files.add(storeFileItemInDirectory(fileItem, directory)); + files.add(moveFileItemIntoDirectory(fileItem, directory)); } return files; } - public static File storeFileItem(FileItem fileItem) { - return storeFileItemInDirectory(fileItem, FileUtils.createScratchDirectory()); + /** + * Save the contents of a `FileItem` in a temporary directory and return the `File`. + */ + public static File saveFileItemLocally(FileItem fileItem) { + return moveFileItemIntoDirectory(fileItem, FileUtils.createScratchDirectory()); } - public static File storeFileItemInDirectory(FileItem fileItem, File directory) { + /** + * Move the contents of a `FileItem` to the given directory by calling `renameTo`. + */ + public static File moveFileItemIntoDirectory(FileItem fileItem, File directory) { File file = new File(directory, fileItem.getName()); boolean renameSuccessful = ((DiskFileItem) fileItem).getStoreLocation().renameTo(file); if (!renameSuccessful) { diff --git a/src/main/java/com/conveyal/file/FileUtils.java b/src/main/java/com/conveyal/file/FileUtils.java index 704fc2e07..5890d18bd 100644 --- a/src/main/java/com/conveyal/file/FileUtils.java +++ b/src/main/java/com/conveyal/file/FileUtils.java @@ -123,7 +123,7 @@ public static boolean isGzip (File file) { /** * Extract the files inside a zipped file into a given directory and return the `File` handles to the new files. */ - public static List unZipFileIntoDirectory(File zipFile, File directory) { + public static List unzipFileIntoDirectory(File zipFile, File directory) { List files = new ArrayList<>(); ZipInputStream zis = new ZipInputStream(FileUtils.getInputStream(zipFile)); ZipEntry zipEntry; From 5135b220a0ac35ac1389ef3b3bd921d67bbd820b Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Tue, 31 Oct 2023 18:31:58 +0800 Subject: [PATCH 10/10] Use `FileItem.write()` to write --- .../java/com/conveyal/analysis/util/HttpUtils.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/util/HttpUtils.java b/src/main/java/com/conveyal/analysis/util/HttpUtils.java index 978dd3f52..74fef654e 100644 --- a/src/main/java/com/conveyal/analysis/util/HttpUtils.java +++ b/src/main/java/com/conveyal/analysis/util/HttpUtils.java @@ -4,7 +4,6 @@ import com.conveyal.file.FileUtils; import com.conveyal.r5.util.ExceptionUtils; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.fileupload.disk.DiskFileItemFactory; import org.apache.commons.fileupload.servlet.ServletFileUpload; @@ -109,13 +108,15 @@ public static File saveFileItemLocally(FileItem fileItem) { } /** - * Move the contents of a `FileItem` to the given directory by calling `renameTo`. + * Move the contents of a `FileItem` to the given directory by calling `write`. `DiskFileItem`s will call `renameTo` + * if the `FileItem's contents are already on the disk. */ public static File moveFileItemIntoDirectory(FileItem fileItem, File directory) { File file = new File(directory, fileItem.getName()); - boolean renameSuccessful = ((DiskFileItem) fileItem).getStoreLocation().renameTo(file); - if (!renameSuccessful) { - throw AnalysisServerException.fileUpload("Error storing file in directory on disk. File.renameTo() failed."); + try { + fileItem.write(file); + } catch (Exception e) { + throw new AnalysisServerException(e, "Error storing file in directory on disk. FileItem.write() failed."); } return file; }