Skip to content

Commit

Permalink
persistent MongoCollections in unmodifiable map
Browse files Browse the repository at this point in the history
also url-safe tokens (can be included as a query param)
factor out logic from streaming / non-streaming database endpoints
added login and token example to vector tile client
  • Loading branch information
abyrd committed May 10, 2022
1 parent ab482b3 commit 7d31889
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 55 deletions.
16 changes: 14 additions & 2 deletions src/main/java/com/conveyal/analysis/components/HttpApi.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,21 @@ private spark.Service configureSparkService () {
LOG.info("Analysis server will listen for HTTP connections on port {}.", config.serverPort());
spark.Service sparkService = spark.Service.ignite();
sparkService.port(config.serverPort());
//sparkService.threadPool(1000);

// Set up TLS (HTTPS). Unfortunately Spark HTTP only accepts String paths to keystore files.
// We want to build a Keystore instance programmatically from PEM files.
// Digging through the Spark source code it seems extremely convoluted to directly inject a Keystore instance.
// sparkService.secure();
// Usage examples at:
// https://github.com/Hakky54/sslcontext-kickstart/blob/master/sslcontext-kickstart-for-pem/src/test/java/nl/altindag/ssl/util/PemUtilsShould.java
// Dependency:
// Tools to load PEM files into Java Keystore (so we don't have to use arcane Java keytool)
// implementation 'io.github.hakky54:sslcontext-kickstart-for-pem:7.4.1'

// Serve up UI files. staticFileLocation("vector-client") inside classpath will not see changes to files.
// Note that this eliminates the need for CORS.
sparkService.externalStaticFileLocation("src/main/resources/vector-client");
// Note that this eliminates the need for CORS headers and eliminates CORS preflight request latency.
sparkService.externalStaticFileLocation("../r5/src/main/resources/vector-client");

// Specify actions to take before the main logic of handling each HTTP request.
sparkService.before((req, res) -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,31 +54,37 @@ public TokenAuthentication (AnalysisDB database) {

@Override
public UserPermissions authenticate(Request request) {
String authHeader = request.headers("authorization");
if (authHeader == null) {
throw new AnalysisServerException(UNAUTHORIZED, "Authorization header mising.", 401);
String token = request.headers("authorization");
// Some places such as MopboxGL do not make it easy to add headers, so also accept token in query parameter.
// The MapboxGL transformUrl setting seems to be missing from recent versions of the library.
if (token == null) {
token = request.queryParams("token");
}
if ("sesame".equalsIgnoreCase(authHeader)) {
if (token == null) {
throw new AnalysisServerException(UNAUTHORIZED, "Authorization token mising.", 401);
}
if ("sesame".equalsIgnoreCase(token)) {
return new UserPermissions("local", true, "local");
}
UserPermissions userPermissions = userForToken(authHeader);
UserPermissions userPermissions = userForToken(token);
if (userPermissions == null) {
throw new AnalysisServerException(UNAUTHORIZED, "Inalid authorization token.", 401);
throw new AnalysisServerException(UNAUTHORIZED, "Invalid authorization token.", 401);
} else {
return userPermissions;
}
}

/**
* TODO is SecureRandom a sufficiently secure source of randomness when used this way?
* Should we be creating a new instance each time?
* @return A Base64 encoded representation of 32 random bytes
* Should we be creating a new instance of SecureRandom each time or reusing it?
* Do not use basic Base64 encoding since it contains some characters that are invalid in URLs.
* @return A url-safe representation of 32 random bytes
*/
public static String generateToken () {
Random random = new SecureRandom();
byte[] tokenBytes = new byte[32];
random.nextBytes(tokenBytes);
String token = Base64.getEncoder().encodeToString(tokenBytes);
String token = Base64.getUrlEncoder().encodeToString(tokenBytes);
return token;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ private Object createUser (Request req, Response res) {
* Create a new token, replacing any existing one for the same user (email).
*/
private Map getTokenForEmail (Request req, Response res) {
// These should probably be in the body not URL, to prevent them from appearing as plaintext in history.
String email = req.queryParams("email");
String password = req.queryParams("password");
// Crude rate limiting, might just lead to connections piling up in event of attack.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,25 @@
import com.conveyal.analysis.UserPermissions;
import com.conveyal.analysis.persistence.AnalysisDB;
import com.google.common.collect.Lists;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection;
import com.mongodb.util.JSON;
import org.bson.BsonArray;
import org.bson.Document;
import org.bson.conversions.Bson;
import org.bson.json.JsonWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;

import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.conveyal.analysis.util.JsonUtil.toJson;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.mongodb.client.model.Filters.and;
import static com.mongodb.client.model.Filters.eq;

Expand All @@ -38,61 +36,72 @@ public class DatabaseController implements HttpController {

private final AnalysisDB database;

private final MongoCollection<Document> regions;
private final MongoCollection<Document> bundles;
private final Map<String, MongoCollection<Document>> mongoCollections;

// Preloading these avoids synchronization during handling http requests by reading from an immutable map.
// TODO verify if it is threadsafe to reuse MongoCollection in all threads.
// Amazingly there seems to be no documentation on this at all. Drilling down into the function calls, it seems
// to create a new session on each find() call, so should presumably go through synchronization.
// In testing with siege and other http benchmarking tools, reusing the MongoCollection seems to result in much
// smoother operation; creating a new MongoCollection on each request seems to jam up after a certain number
// of requests (perhaps waiting for idle MongoCollectons to be cleaned up).
public Map<String, MongoCollection<Document>> mongoCollectionMap (String... collectionNames) {
Map<String, MongoCollection<Document>> map = new HashMap<>();
for (String name : collectionNames) {
map.put(name, database.getBsonCollection(name));
}
// Make the map immutable for threadsafe reading and return.
return Map.copyOf(map);
}

public DatabaseController(AnalysisDB database) {
this.database = database;
// TODO verify if it is threadsafe to reuse this collection in all threads
// Also verify whether it's any slower to just get the collection on every GET operation.
// Testing with Apache bench, retaining and reusing the collection seems much smoother.
this.regions = database.getBsonCollection("regions");
this.bundles = database.getBsonCollection("bundles");
this.mongoCollections = mongoCollectionMap("regions", "bundles");
}

/**
* Fetch anything from database. Buffers in memory so not suitable for huge responses.
* register serialization with sparkService.get("/api/db/:collection", this::getDocuments, toJson);
*/
private Iterable<Document> getDocuments (Request req, Response res) {
/** Factored out for experimenting with streaming and non-streaming approaches to serialization. */
private FindIterable<Document> getDocuments (Request req) {
String accessGroup = UserPermissions.from(req).accessGroup;
final String collectionName = req.params("collection");
MongoCollection<Document> collection = collectionName.equals("bundles") ? bundles :
database.getBsonCollection(collectionName);
MongoCollection<Document> collection = mongoCollections.get(collectionName);
checkNotNull(collection, "Collection not available: " + collectionName);
List<Bson> filters = Lists.newArrayList(eq("accessGroup", accessGroup));
req.queryMap().toMap().forEach((key, values) -> {
for (String value : values) {
filters.add(eq(key, value));
}
});
return collection.find(and(filters));
}

/**
* Fetch anything from database. Buffers all documents in memory so may not not suitable for large responses.
* Register result serialization with: sparkService.get("/api/db/:collection", this::getDocuments, toJson);
*/
private Iterable<Document> getDocuments (Request req, Response res) {
FindIterable<Document> docs = getDocuments(req);
List<Document> documents = new ArrayList<>();
collection.find(and(filters)).into(documents);
docs.into(documents);
return documents;
}

/**
* Fetch anything from database. Streaming processing, no in-memory buffering of the BsonDocuments.
* The output stream does buffer to some extent but should stream chunks instead of serializing into memory.
* Anecdotally in testing with seige this does seem to almost double the response rate and allow double the
* concurrent connections without stalling (though still low at 20, and it eventually does stall).
*/
private Object getDocumentsStreaming (Request req, Response res) {
String accessGroup = UserPermissions.from(req).accessGroup;
final String collectionName = req.params("collection");
MongoCollection<Document> collection = collectionName.equals("bundles") ? bundles :
database.getBsonCollection(collectionName);
List<Bson> filters = Lists.newArrayList(eq("accessGroup", accessGroup));
req.queryMap().toMap().forEach((key, values) -> {
for (String value : values) {
filters.add(eq(key, value));
}
});
FindIterable<Document> docs = getDocuments(req);
// getOutputStream returns a ServletOutputStream, usually Jetty implementation HttpOutputStream which
// buffers the output. doc.toJson() creates a lot of short-lived objects which could be factored out.
// The Mongo driver says to use JsonWriter or toJson() rather than utility methods:
// https://github.com/mongodb/mongo-java-driver/commit/63409f9cb3bbd0779dd5139355113d9b227dfa05
try (OutputStream out = res.raw().getOutputStream()) {
try {
OutputStream out = res.raw().getOutputStream();
out.write('['); // Begin JSON array.
boolean firstElement = true;
for (Document doc : collection.find(and(filters))) {
for (Document doc : docs) {
if (firstElement) {
firstElement = false;
} else {
Expand All @@ -101,17 +110,16 @@ private Object getDocumentsStreaming (Request req, Response res) {
out.write(doc.toJson().getBytes(StandardCharsets.UTF_8));
}
out.write(']'); // Close JSON array.
} catch (IOException e) {
// We do not close the OutputStream, even implicitly with a try-with-resources.
// The thinking is that closing the stream might close the underlying connection, which might be keepalive.
} catch (Exception e) {
throw new RuntimeException("Failed to write database records as JSON.", e);
}
// Since we're directly writing to the OutputStream, no need to return anything.
// But do not return null or Spark will complain cryptically.
return "";
}

// Testing with Apache bench shows some stalling
// -k keepalive connections fails immediately

@Override
public void registerEndpoints (spark.Service sparkService) {
sparkService.get("/api/db/:collection", this::getDocuments, toJson);
Expand Down
17 changes: 11 additions & 6 deletions src/main/resources/vector-client/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,16 @@

mapboxgl.accessToken = 'TOKEN_HERE';

let token = new URLSearchParams(window.location.search).get('token')
function authFetch (url) {
return fetch(url, { headers: {'Authorization': token }})
}

const regionSelectElement = document.getElementById("regions")
function updateRegionSelector () {
regionSelectElement.add(new Option("None"));
// Returns an array of regions. Add them to the region selector DOM element.
fetch('http://localhost:7070/api/db/regions')
authFetch('http://localhost:7070/api/db/regions')
.then(response => response.json())
.then(regions => {
for (const region of regions) {
Expand All @@ -53,7 +58,7 @@
// Returns an array of bundles. Add them to the bundle selector DOM element.
document.querySelectorAll('#bundles option').forEach(option => option.remove())
bundleSelectElement.add(new Option("None"));
fetch(`http://localhost:7070/api/db/bundles?regionId=${regionId}`)
authFetch(`http://localhost:7070/api/db/bundles?regionId=${regionId}`)
.then(response => response.json())
.then(bundles => {
for (const bundle of bundles) {
Expand All @@ -80,7 +85,7 @@
feedId = null;

function updateRegion (regionId) {
fetch(`http://localhost:7070/api/db/regions?_id=${regionId}`)
authFetch(`http://localhost:7070/api/db/regions?_id=${regionId}`)
.then(response => response.json())
.then(r => {
region = r[0];
Expand All @@ -91,7 +96,7 @@
}

function updateBundle (bundleId) {
fetch(`http://localhost:7070/api/db/bundles?_id=${bundleId}`)
authFetch(`http://localhost:7070/api/db/bundles?_id=${bundleId}`)
.then(response => response.json())
.then(b => {
bundle = b[0];
Expand All @@ -112,8 +117,8 @@

feedSelectElement.onchange = function (event) {
feedId = event.target.value;
// setUrl expects a URL to TileJSON, not a URL to the tiles themselves.
map.getSource('r5').setTiles([`http://localhost:7070/api/gtfs/${bundle.feedGroupId}/${feedId}/tiles/{z}/{x}/{y}`]);
// setUrl expects a URL to TileJSON, not a URL to the tiles themselves (use setTiles()).
map.getSource('r5').setTiles([`http://localhost:7070/api/gtfs/${bundle.feedGroupId}/${feedId}/tiles/{z}/{x}/{y}?token=${token}`]);
}

let map = new mapboxgl.Map({
Expand Down
41 changes: 41 additions & 0 deletions src/main/resources/vector-client/login.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Conveyal Login</title>
<style>
body { margin: 0; padding: 0; font-family: sans-serif}
#panel {
display: flex; flex-direction: column; width: 20%; padding: 20px; margin: auto;
}
</style>
</head>
<body>
<div id="panel">
<label for="email">Email:</label>
<input type="email" id="email"></input>
<label for="password">Password:</label>
<input type="password" id="password"></input>
<button name="login" id="login">Log In</button>
</div>
<script>

let emailField = document.getElementById("email");
let passwordField = document.getElementById("password");
let loginButton = document.getElementById("login");
loginButton.onclick = function (e) {
// TODO validate/sanitize
let email = emailField.value;
let password = passwordField.value;
let url = `http://localhost:7070/token?email=${email}&password=${password}`
fetch(url)
.then(response => response.json())
.then(response => {
console.log(response.token);
window.location.href = `index.html?token=${response.token}`;
});
};

</script>
</body>
</html>
2 changes: 1 addition & 1 deletion src/main/resources/vector-client/vectorstyle.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"r5": {
"type": "vector",
"tiles": [
"http://localhost:7070/api/gtfs/61137f589919c7627cb5647f/61137f589919c7627cb56480/tiles/{z}/{x}/{y}"
"http://localhost:7070/dummy"
],
"maxzoom": 14
},
Expand Down

0 comments on commit 7d31889

Please sign in to comment.