Skip to content

Commit

Permalink
Convert FileItems to Files before using
Browse files Browse the repository at this point in the history
We use Apache FileUpload to assist with handling incoming files. We also use a DiskItemFactory that persists each incoming file to disk immediately. This means that we could represent each incoming file as a java.io.File instead of a FileItem. java.io.File is a much more common way of handling a file and easier to work with.
  • Loading branch information
trevorgerhardt committed Oct 28, 2023
1 parent 4397ac7 commit e6b87b8
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import com.conveyal.analysis.persistence.AnalysisCollection;
import com.conveyal.analysis.persistence.AnalysisDB;
import com.conveyal.analysis.persistence.Persistence;
import com.conveyal.analysis.util.FileItemInputStreamProvider;
import com.conveyal.analysis.util.HttpUtils;
import com.conveyal.analysis.util.JsonUtil;
import com.conveyal.file.FileStorage;
Expand All @@ -25,12 +24,12 @@
import com.conveyal.r5.analyst.progress.Task;
import com.conveyal.r5.analyst.progress.WorkProduct;
import com.conveyal.r5.util.ExceptionUtils;
import com.conveyal.r5.util.InputStreamProvider;
import com.conveyal.r5.util.ProgressListener;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.io.Files;
import com.mongodb.QueryBuilder;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.commons.io.FilenameUtils;
import org.bson.types.ObjectId;
import org.slf4j.Logger;
Expand Down Expand Up @@ -275,7 +274,7 @@ private static FileStorageFormat getFormatCode (PointSet pointSet) {
* This method executes in a blocking (synchronous) manner, but it can take a while so should be called within an
* non-blocking asynchronous task.
*/
private List<FreeFormPointSet> createFreeFormPointSetsFromCsv(FileItem csvFileItem, Map<String, String> params) {
private List<FreeFormPointSet> createFreeFormPointSetsFromCsv(File csvFile, Map<String, String> params) {

String latField = params.get("latField");
String lonField = params.get("lonField");
Expand All @@ -296,12 +295,11 @@ private List<FreeFormPointSet> createFreeFormPointSetsFromCsv(FileItem csvFileIt

try {
List<FreeFormPointSet> pointSets = new ArrayList<>();
InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem);
pointSets.add(FreeFormPointSet.fromCsv(csvStreamProvider, latField, lonField, idField, countField));
pointSets.add(FreeFormPointSet.fromCsv(csvFile, latField, lonField, idField, countField));
// The second pair of lat and lon fields allow creating two matched pointsets from the same CSV.
// This is used for one-to-one travel times between specific origins/destinations.
if (latField2 != null && lonField2 != null) {
pointSets.add(FreeFormPointSet.fromCsv(csvStreamProvider, latField2, lonField2, idField, countField));
pointSets.add(FreeFormPointSet.fromCsv(csvFile, latField2, lonField2, idField, countField));
}
return pointSets;
} catch (Exception e) {
Expand Down Expand Up @@ -331,14 +329,19 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res
OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, sourceName);
addStatusAndRemoveOldStatuses(status);

final List<File> files = new ArrayList<>();
final List<FileItem> fileItems;
final FileStorageFormat uploadFormat;
final Map<String, String> parameters;
try {
// Validate inputs and parameters, which will throw an exception if there's anything wrong with them.
// Call remove() rather than get() so that subsequent code will see only string parameters, not the files.
fileItems = formFields.remove("files");
uploadFormat = detectUploadFormatAndValidate(fileItems);
for (var fi : fileItems) {
var dfi = (DiskFileItem) fi;
files.add(dfi.getStoreLocation());
}
uploadFormat = detectUploadFormatAndValidate(files);
parameters = extractStringParameters(formFields);
} catch (Exception e) {
status.completeWithError(e);
Expand All @@ -354,35 +357,35 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res
List<PointSet> pointsets = new ArrayList<>();
if (uploadFormat == FileStorageFormat.GRID) {
LOG.info("Detected opportunity dataset stored in Conveyal binary format.");
pointsets.addAll(createGridsFromBinaryGridFiles(fileItems, status));
pointsets.addAll(createGridsFromBinaryGridFiles(files, status));
} else if (uploadFormat == FileStorageFormat.SHP) {
LOG.info("Detected opportunity dataset stored as ESRI shapefile.");
pointsets.addAll(createGridsFromShapefile(fileItems, zoom, status));
pointsets.addAll(createGridsFromShapefile(files, zoom, status));
} else if (uploadFormat == FileStorageFormat.CSV) {
LOG.info("Detected opportunity dataset stored as CSV");
// Create a grid even when user has requested a freeform pointset so we have something to visualize.
FileItem csvFileItem = fileItems.get(0);
File csvFile = files.get(0);
// FIXME why were we uploading to S3 using the file path not the UUID?
// writeFileToS3(csvFile);
// TODO report progress / status as with grids. That involves pre-scanning the CSV which would be
// facilitated by retaining the CSV server side and later converting to pointset.
boolean requestedFreeForm = Boolean.parseBoolean(parameters.get("freeform"));
// Hack to enable freeform pointset building without exposing a UI element, via file name.
if (csvFileItem.getName().contains("FREEFORM_PS.")) {
if (csvFile.getName().contains("FREEFORM_PS.")) {
requestedFreeForm = true;
}
if (requestedFreeForm) {
LOG.info("Processing CSV as freeform (rather than gridded) pointset as requested.");
// This newer process creates a FreeFormPointSet only for the specified count fields,
// as well as a Grid to assist in visualization of the uploaded data.
for (FreeFormPointSet freeForm : createFreeFormPointSetsFromCsv(csvFileItem, parameters)) {
for (FreeFormPointSet freeForm : createFreeFormPointSetsFromCsv(csvFile, parameters)) {
Grid gridFromFreeForm = Grid.fromFreeForm(freeForm, zoom);
pointsets.add(freeForm);
pointsets.add(gridFromFreeForm);
}
} else {
// This is the common default process: create a grid for every non-ignored field in the CSV.
pointsets.addAll(createGridsFromCsv(csvFileItem, formFields, zoom, status));
pointsets.addAll(createGridsFromCsv(csvFile, formFields, zoom, status));
}
}
if (pointsets.isEmpty()) {
Expand Down Expand Up @@ -473,7 +476,7 @@ private OpportunityDataset deleteDataset(String id, UserPermissions userPermissi
* TODO explain latField2 usage
* @return one or two Grids for each numeric column in the CSV input.
*/
private List<Grid> createGridsFromCsv(FileItem csvFileItem,
private List<Grid> createGridsFromCsv(File csvFile,
Map<String, List<FileItem>> query,
int zoom,
OpportunityDatasetUploadStatus status) throws Exception {
Expand All @@ -488,12 +491,11 @@ private List<Grid> createGridsFromCsv(FileItem csvFileItem,
String lonField2 = HttpUtils.getFormField(query, "lonField2", false);

List<String> ignoreFields = Arrays.asList(idField, latField2, lonField2);
InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem);
List<Grid> grids = Grid.fromCsv(csvStreamProvider, latField, lonField, ignoreFields, zoom, status);
List<Grid> grids = Grid.fromCsv(csvFile, latField, lonField, ignoreFields, zoom, status);
// TODO verify correctness of this second pass
if (latField2 != null && lonField2 != null) {
ignoreFields = Arrays.asList(idField, latField, lonField);
grids.addAll(Grid.fromCsv(csvStreamProvider, latField2, lonField2, ignoreFields, zoom, status));
grids.addAll(Grid.fromCsv(csvFile, latField2, lonField2, ignoreFields, zoom, status));
}

return grids;
Expand All @@ -503,14 +505,14 @@ private List<Grid> createGridsFromCsv(FileItem csvFileItem,
* Create a grid from an input stream containing a binary grid file.
* For those in the know, we can upload manually created binary grid files.
*/
private List<Grid> createGridsFromBinaryGridFiles(List<FileItem> uploadedFiles,
private List<Grid> createGridsFromBinaryGridFiles(List<File> uploadedFiles,
OpportunityDatasetUploadStatus status) throws Exception {

List<Grid> grids = new ArrayList<>();
status.totalFeatures = uploadedFiles.size();
for (FileItem fileItem : uploadedFiles) {
Grid grid = Grid.read(fileItem.getInputStream());
String name = fileItem.getName();
for (File file : uploadedFiles) {
Grid grid = Grid.read(FileUtils.getInputStream(file));
String name = file.getName();
// Remove ".grid" from the name
if (name.contains(".grid")) name = name.split(".grid")[0];
grid.name = name;
Expand All @@ -522,37 +524,37 @@ private List<Grid> createGridsFromBinaryGridFiles(List<FileItem> uploadedFiles,
}

/**
* Preconditions: fileItems must contain SHP, DBF, and PRJ files, and optionally SHX. All files should have the
* Preconditions: files must contain SHP, DBF, and PRJ files, and optionally SHX. All files should have the
* same base name, and should not contain any other files but these three or four.
*/
private List<Grid> createGridsFromShapefile(List<FileItem> fileItems,
private List<Grid> createGridsFromShapefile(List<File> files,
int zoom,
OpportunityDatasetUploadStatus status) throws Exception {

// In the caller, we should have already verified that all files have the same base name and have an extension.
// Extract the relevant files: .shp, .prj, .dbf, and .shx.
// We need the SHX even though we're looping over every feature as they might be sparse.
Map<String, FileItem> filesByExtension = new HashMap<>();
for (FileItem fileItem : fileItems) {
filesByExtension.put(FilenameUtils.getExtension(fileItem.getName()).toUpperCase(), fileItem);
Map<String, File> filesByExtension = new HashMap<>();
for (File file : files) {
filesByExtension.put(FilenameUtils.getExtension(file.getName()).toUpperCase(), file);
}

// Copy the shapefile component files into a temporary directory with a fixed base name.
File tempDir = Files.createTempDir();

File shpFile = new File(tempDir, "grid.shp");
filesByExtension.get("SHP").write(shpFile);
Files.copy(filesByExtension.get("SHP"), shpFile);

File prjFile = new File(tempDir, "grid.prj");
filesByExtension.get("PRJ").write(prjFile);
Files.copy(filesByExtension.get("PRJ"), prjFile);

File dbfFile = new File(tempDir, "grid.dbf");
filesByExtension.get("DBF").write(dbfFile);
Files.copy(filesByExtension.get("DBF"), dbfFile);

// The .shx file is an index. It is optional, and not needed for dense shapefiles.
if (filesByExtension.containsKey("SHX")) {
File shxFile = new File(tempDir, "grid.shx");
filesByExtension.get("SHX").write(shxFile);
Files.copy(filesByExtension.get("SHX"), shxFile);
}

List<Grid> grids = Grid.fromShapefile(shpFile, zoom, status);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
import org.slf4j.LoggerFactory;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;

import static com.conveyal.analysis.util.HttpUtils.getFormField;
import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate;
import static com.conveyal.analysis.util.HttpUtils.getFormField;
import static com.conveyal.file.FileCategory.DATASOURCES;
import static com.conveyal.file.FileStorageFormat.SHP;
import static com.google.common.base.Preconditions.checkNotNull;
Expand All @@ -42,7 +43,7 @@ public class DataSourceUploadAction implements TaskAction {
private final AnalysisCollection<DataSource> dataSourceCollection;

/** The files provided in the HTTP post form. These will be moved into storage. */
private final List<FileItem> fileItems;
private final List<File> files;

/**
* This DataSourceIngester provides encapsulated loading and validation logic for a single format, by composition
Expand All @@ -64,12 +65,12 @@ public String getDataSourceName () {
public DataSourceUploadAction (
FileStorage fileStorage,
AnalysisCollection<DataSource> dataSourceCollection,
List<FileItem> fileItems,
List<File> files,
DataSourceIngester ingester
) {
this.fileStorage = fileStorage;
this.dataSourceCollection = dataSourceCollection;
this.fileItems = fileItems;
this.files = files;
this.ingester = ingester;
}

Expand All @@ -94,18 +95,17 @@ private final void moveFilesIntoStorage (ProgressListener progressListener) {
// (with filenames that correspond to the source id)
progressListener.beginTask("Moving files into storage...", 1);
final String dataSourceId = ingester.dataSource()._id.toString();
for (FileItem fileItem : fileItems) {
DiskFileItem dfi = (DiskFileItem) fileItem;
for (File file : files) {
// Use canonical extension from file type - files may be uploaded with e.g. tif instead of tiff or geotiff.
String extension = ingester.dataSource().fileFormat.extension;
if (fileItems.size() > 1) {
if (files.size() > 1) {
// If we have multiple files, as with Shapefile, just keep the original extension for each file.
// This could lead to orphaned files after a deletion, we might want to implement wildcard deletion.
extension = FilenameUtils.getExtension(fileItem.getName()).toLowerCase(Locale.ROOT);
extension = FilenameUtils.getExtension(file.getName()).toLowerCase(Locale.ROOT);
}
FileStorageKey key = new FileStorageKey(DATASOURCES, dataSourceId, extension);
fileStorage.moveIntoStorage(key, dfi.getStoreLocation());
if (fileItems.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) {
fileStorage.moveIntoStorage(key, file);
if (files.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) {
file = fileStorage.getFile(key);
}
}
Expand All @@ -128,14 +128,20 @@ public static DataSourceUploadAction forFormFields (
final String sourceName = getFormField(formFields, "sourceName", true);
final String regionId = getFormField(formFields, "regionId", true);
final List<FileItem> fileItems = formFields.get("sourceFiles");
final List<File> files = new ArrayList<>();

for (var fi : fileItems) {
var dfi = (DiskFileItem) fi;
files.add(dfi.getStoreLocation());
}

FileStorageFormat format = detectUploadFormatAndValidate(fileItems);
FileStorageFormat format = detectUploadFormatAndValidate(files);
DataSourceIngester ingester = DataSourceIngester.forFormat(format);

String originalFileNames = fileItems.stream().map(FileItem::getName).collect(Collectors.joining(", "));
String originalFileNames = files.stream().map(File::getName).collect(Collectors.joining(", "));
ingester.initializeDataSource(sourceName, originalFileNames, regionId, userPermissions);
DataSourceUploadAction dataSourceUploadAction =
new DataSourceUploadAction(fileStorage, dataSourceCollection, fileItems, ingester);
new DataSourceUploadAction(fileStorage, dataSourceCollection, files, ingester);

return dataSourceUploadAction;
}
Expand Down
22 changes: 9 additions & 13 deletions src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import com.conveyal.file.FileStorageFormat;
import com.google.common.collect.Sets;
import org.apache.commons.fileupload.FileItem;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.commons.io.FilenameUtils;

import java.io.File;
Expand All @@ -29,7 +27,7 @@ public abstract class DataSourceUtil {
* @throws DataSourceException if the type of the upload can't be detected or preconditions are violated.
* @return the expected type of the uploaded file or files, never null.
*/
public static FileStorageFormat detectUploadFormatAndValidate (List<FileItem> fileItems) {
public static FileStorageFormat detectUploadFormatAndValidate (List<File> fileItems) {
if (isNullOrEmpty(fileItems)) {
throw new DataSourceException("You must select some files to upload.");
}
Expand Down Expand Up @@ -78,10 +76,8 @@ public static FileStorageFormat detectUploadFormatAndValidate (List<FileItem> fi
* Check that all FileItems supplied are stored in disk files (not memory), that they are all readable and all
* have nonzero size.
*/
private static void checkFileCharacteristics (List<FileItem> fileItems) {
for (FileItem fileItem : fileItems) {
checkState(fileItem instanceof DiskFileItem, "Uploaded file was not stored to disk.");
File diskFile = ((DiskFileItem)fileItem).getStoreLocation();
private static void checkFileCharacteristics (List<File> files) {
for (File diskFile : files) {
checkState(diskFile.exists(), "Uploaded file does not exist on filesystem as expected.");
checkState(diskFile.canRead(), "Read permissions were not granted on uploaded file.");
checkState(diskFile.length() > 0, "Uploaded file was empty (contained no data).");
Expand All @@ -92,10 +88,10 @@ private static void checkFileCharacteristics (List<FileItem> fileItems) {
* Given a list of FileItems, return a set of all unique file extensions present, normalized to lower case.
* Always returns a set instance which may be empty, but never null.
*/
private static Set<String> extractFileExtensions (List<FileItem> fileItems) {
private static Set<String> extractFileExtensions (List<File> files) {
Set<String> fileExtensions = new HashSet<>();
for (FileItem fileItem : fileItems) {
String fileName = fileItem.getName();
for (File file : files) {
String fileName = file.getName();
String extension = FilenameUtils.getExtension(fileName);
if (extension.isEmpty()) {
throw new DataSourceException("Filename has no extension: " + fileName);
Expand All @@ -106,10 +102,10 @@ private static Set<String> extractFileExtensions (List<FileItem> fileItems) {
}

/** In uploads containing more than one file, all files are expected to have the same name before the extension. */
private static void verifyBaseNamesSame (List<FileItem> fileItems) {
private static void verifyBaseNamesSame (List<File> files) {
String firstBaseName = null;
for (FileItem fileItem : fileItems) {
String baseName = FilenameUtils.getBaseName(fileItem.getName());
for (File file : files) {
String baseName = FilenameUtils.getBaseName(file.getName());
if (firstBaseName == null) {
firstBaseName = baseName;
} else if (!firstBaseName.equals(baseName)) {
Expand Down
Loading

0 comments on commit e6b87b8

Please sign in to comment.