diff --git a/build.sbt b/build.sbt index c1d28b5f..6ce82a35 100644 --- a/build.sbt +++ b/build.sbt @@ -6,10 +6,10 @@ name := "facia-api-client" description := "Scala client for The Guardian's Facia JSON API" -ThisBuild / scalaVersion := "2.13.14" +ThisBuild / scalaVersion := "2.13.15" val sonatypeReleaseSettings = Seq( - releaseVersion := fromAggregatedAssessedCompatibilityWithLatestRelease().value, + // releaseVersion := fromAggregatedAssessedCompatibilityWithLatestRelease().value, releaseCrossBuild := true, // true if you cross-build the project for multiple Scala versions releaseProcess := Seq[ReleaseStep]( checkSnapshotDependencies, @@ -37,7 +37,14 @@ def artifactProducingSettings(supportScala3: Boolean) = Seq( libraryDependencies += scalaTest ) +// fapi-client-core is independent of AWS SDK version. +lazy val fapiClient_core = (project in file("fapi-client-core")).settings( + libraryDependencies += eTagCachingS3Base, + artifactProducingSettings(supportScala3 = true) +) + lazy val root = (project in file(".")).aggregate( + fapiClient_core, faciaJson_play28, faciaJson_play29, faciaJson_play30, @@ -55,9 +62,10 @@ def playJsonSpecificProject(module: String, playJsonVersion: PlayJsonVersion) = ) def faciaJson(playJsonVersion: PlayJsonVersion) = playJsonSpecificProject("facia-json", playJsonVersion) + .dependsOn(fapiClient_core) .settings( libraryDependencies ++= Seq( - awsSdk, + awsS3SdkV1, // ideally, this would be pushed out to a separate FAPI artifact, or just not used directly at all commonsIo, playJsonVersion.lib, "org.scala-lang.modules" %% "scala-collection-compat" % "2.11.0", @@ -69,6 +77,7 @@ def faciaJson(playJsonVersion: PlayJsonVersion) = playJsonSpecificProject("facia def fapiClient(playJsonVersion: PlayJsonVersion) = playJsonSpecificProject("fapi-client", playJsonVersion) .settings( libraryDependencies ++= Seq( + eTagCachingS3SupportForTesting, contentApi, contentApiDefault, commercialShared, diff --git a/facia-json/src/main/scala/com/gu/facia/client/S3Client.scala b/facia-json/src/main/scala/com/gu/facia/client/AmazonSdkS3Client.scala similarity index 79% rename from facia-json/src/main/scala/com/gu/facia/client/S3Client.scala rename to facia-json/src/main/scala/com/gu/facia/client/AmazonSdkS3Client.scala index 1c198454..4116491c 100644 --- a/facia-json/src/main/scala/com/gu/facia/client/S3Client.scala +++ b/facia-json/src/main/scala/com/gu/facia/client/AmazonSdkS3Client.scala @@ -1,19 +1,19 @@ package com.gu.facia.client -import com.amazonaws.regions.{Region, Regions} -import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} +import com.amazonaws.regions.Regions import com.amazonaws.services.s3.model.AmazonS3Exception +import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} import org.apache.commons.io.IOUtils + import scala.concurrent.{ExecutionContext, Future, blocking} import scala.util.Try -/** For mocking in tests, but also to allow someone to define a properly asynchronous S3 client. (The one in the AWS - * SDK is unfortunately synchronous only.) - */ -trait S3Client { - def get(bucket: String, path: String): Future[FaciaResult] -} - +/** + * Legacy wrapper around the AWS SDK v1, which is itself a legacy version of the AWS SDK. + * + * Ideally, don't use this class. It is passed to the legacy `com.gu.facia.client.ApiClient()` constructor - + * instead use AWS SDK v2 and the `ApiClient.withCaching()` constructor. + */ case class AmazonSdkS3Client(client: AmazonS3)(implicit executionContext: ExecutionContext) extends S3Client { def get(bucket: String, path: String): Future[FaciaResult] = Future { blocking { diff --git a/facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala b/facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala index 5c7936fe..51aba1b8 100644 --- a/facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala +++ b/facia-json/src/main/scala/com/gu/facia/client/ApiClient.scala @@ -1,37 +1,82 @@ package com.gu.facia.client -import com.gu.facia.client.models.{ConfigJson, CollectionJson} +import com.gu.etagcaching.FreshnessPolicy.AlwaysWaitForRefreshedValue +import com.gu.etagcaching.aws.s3.{ObjectId, S3ByteArrayFetching} +import com.gu.etagcaching.fetching.Fetching +import com.gu.etagcaching.{ConfigCache, ETagCache} +import com.gu.facia.client.models.{CollectionJson, ConfigJson} import play.api.libs.json.{Format, Json} import scala.concurrent.{ExecutionContext, Future} -object ApiClient { - val Encoding = "utf-8" +trait ApiClient { + def config: Future[ConfigJson] + + def collection(id: String): Future[Option[CollectionJson]] } -case class ApiClient( +object ApiClient { + private val Encoding = "utf-8" + + /** + * Legacy constructor for creating a client that does not support caching. Use `ApiClient.withCaching()` instead. + */ + def apply( bucket: String, - /** e.g., CODE, PROD, DEV ... */ - environment: String, + environment: String, // e.g., CODE, PROD, DEV ... s3Client: S3Client -)(implicit executionContext: ExecutionContext) { - import com.gu.facia.client.ApiClient._ - - private def retrieve[A: Format](key: String): Future[Option[A]] = s3Client.get(bucket, key) map { - case FaciaSuccess(bytes) => - Some(Json.fromJson[A](Json.parse(new String(bytes, Encoding))) getOrElse { - throw new JsonDeserialisationError(s"Could not deserialize JSON in $bucket, $key") - }) - case FaciaNotAuthorized(message) => throw new BackendError(message) - case FaciaNotFound(_) => None - case FaciaUnknownError(message) => throw new BackendError(message) + )(implicit executionContext: ExecutionContext): ApiClient = new ApiClient { + val env: Environment = Environment(environment) + + private def retrieve[A: Format](key: String): Future[Option[A]] = s3Client.get(bucket, key).map(translateFaciaResult[A](_)) + + def config: Future[ConfigJson] = + retrieve[ConfigJson](env.configS3Path).map(getOrWarnAboutMissingConfig) + + def collection(id: String): Future[Option[CollectionJson]] = + retrieve[CollectionJson](env.collectionS3Path(id)) + } + + private def getOrWarnAboutMissingConfig(configOpt: Option[ConfigJson]): ConfigJson = + configOpt.getOrElse(throw BackendError("Config was missing!! NOT GOOD!")) + + private def translateFaciaResult[B: Format](faciaResult: FaciaResult): Option[B] = faciaResult match { + case FaciaSuccess(bytes) => Some(parseBytes(bytes)) + case FaciaNotAuthorized(message) => throw BackendError(message) + case FaciaNotFound(_) => None + case FaciaUnknownError(message) => throw BackendError(message) } - def config: Future[ConfigJson] = - retrieve[ConfigJson](s"$environment/frontsapi/config/config.json").map(_ getOrElse { - throw new BackendError("Config was missing!! OH MY GOD") - }) + private def parseBytes[B: Format](bytes: Array[Byte]): B = + Json.fromJson[B](Json.parse(new String(bytes, Encoding))) getOrElse { + throw JsonDeserialisationError(s"Could not deserialize JSON") + } + + /** + * @param s3Fetching see scaladoc on `S3ByteArrayFetching` i.e. use `S3ObjectFetching.byteArraysWith(s3AsyncClient)` + */ + def withCaching( + bucket: String, + environment: Environment, + s3Fetching: S3ByteArrayFetching, + configureCollectionCache: ConfigCache = _.maximumSize(10000) // at most 1GB RAM worst case + )(implicit ec: ExecutionContext): ApiClient = new ApiClient { + private val fetching = + s3Fetching.keyOn[String](path => ObjectId(bucket, path)) + + def eTagCache[B: Format](configureCache: ConfigCache) = new ETagCache( + fetching.thenParsing(parseBytes[B]), + AlwaysWaitForRefreshedValue, + configureCache + ) + + private val configCache = eTagCache[ConfigJson](_.maximumSize(1)) + private val collectionCache = eTagCache[CollectionJson](configureCollectionCache) - def collection(id: String): Future[Option[CollectionJson]] = - retrieve[CollectionJson](s"$environment/frontsapi/collection/$id/collection.json") + override def config: Future[ConfigJson] = + configCache.get(environment.configS3Path).map(getOrWarnAboutMissingConfig) + + override def collection(id: String): Future[Option[CollectionJson]] = + collectionCache.get(environment.collectionS3Path(id)) + } } diff --git a/facia-json/src/test/scala/com/gu/facia/client/ApiClientSpec.scala b/facia-json/src/test/scala/com/gu/facia/client/ApiClientSpec.scala index 91f499a2..92f05f74 100644 --- a/facia-json/src/test/scala/com/gu/facia/client/ApiClientSpec.scala +++ b/facia-json/src/test/scala/com/gu/facia/client/ApiClientSpec.scala @@ -1,33 +1,63 @@ package com.gu.facia.client +import com.gu.etagcaching.aws.s3.{ObjectId, S3ByteArrayFetching} +import com.gu.etagcaching.fetching.{ETaggedData, Fetching, Missing, MissingOrETagged} import com.gu.facia.client.lib.ResourcesHelper import org.scalatest.OptionValues import org.scalatest.concurrent.{IntegrationPatience, ScalaFutures} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -import scala.concurrent.ExecutionContext.Implicits.global -import scala.concurrent.Future +import scala.concurrent.{ExecutionContext, Future} +import scala.util.hashing.MurmurHash3 + +/** + * This is used only for testing, it's a dummy implementation of S3ByteArrayFetching that + * just loads blobs from the `src/test/resources` folder, rather than hitting S3. + */ +object FakeS3Fetching extends S3ByteArrayFetching with ResourcesHelper { + private def pretendETagFor(bytes: Array[Byte]): String = MurmurHash3.bytesHash(bytes).toHexString + + override def fetch(objectId: ObjectId)(implicit ec: ExecutionContext): Future[MissingOrETagged[Array[Byte]]] = Future { + slurpBytes(objectId.key).fold(Missing: MissingOrETagged[Array[Byte]]) { bytes => + ETaggedData(pretendETagFor(bytes), bytes) + } + } + + override def fetchOnlyIfETagChanged(objectId: ObjectId, oldETag: String)(implicit ec: ExecutionContext): Future[Option[MissingOrETagged[Array[Byte]]]] = { + fetch(objectId).map { + case taggedData: ETaggedData[_] => + Option.unless(oldETag == taggedData.eTag)(taggedData) // simulate a Not-Modified response, if there's no change in ETag + case x => Some(x) + } + } +} class ApiClientSpec extends AnyFlatSpec with Matchers with OptionValues with ScalaFutures with IntegrationPatience { + import scala.concurrent.ExecutionContext.Implicits.global + object FakeS3Client extends S3Client with ResourcesHelper { override def get(bucket: String, path: String): Future[FaciaResult] = Future { slurpOrDie(path) } } - val client: ApiClient = ApiClient("not used", "DEV", FakeS3Client) + val legacyClient: ApiClient = ApiClient("not used", "DEV", FakeS3Client) + val cachingClient: ApiClient = ApiClient.withCaching("not used", Environment.Dev, FakeS3Fetching) - "ApiClient" should "fetch the config" in { - val config = client.config.futureValue + // Here we're testing that both the legacy client and the new caching client satisfy the same test criteria + for ((name, client) <- Map("legacy" -> legacyClient, "caching" -> cachingClient)) { + s"$name ApiClient" should "fetch the config" in { + val config = client.config.futureValue - config.collections should have size 334 - config.fronts should have size 79 - } + config.collections should have size 334 + config.fronts should have size 79 + } - it should "fetch a collection" in { - val collectionOpt = client.collection("2409-31b3-83df0-de5a").futureValue + it should "fetch a collection" in { + val collectionOpt = cachingClient.collection("2409-31b3-83df0-de5a").futureValue - collectionOpt.value.live should have size 8 + collectionOpt.value.live should have size 8 + } } } \ No newline at end of file diff --git a/facia-json/src/test/scala/com/gu/facia/client/lib/ApiTestClient.scala b/facia-json/src/test/scala/com/gu/facia/client/lib/ApiTestClient.scala index 1fda7b27..5d68c362 100644 --- a/facia-json/src/test/scala/com/gu/facia/client/lib/ApiTestClient.scala +++ b/facia-json/src/test/scala/com/gu/facia/client/lib/ApiTestClient.scala @@ -1,27 +1,21 @@ package com.gu.facia.client.lib -import com.amazonaws.auth.{ AWSStaticCredentialsProvider, BasicAWSCredentials} -import com.amazonaws.regions.Regions -import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} import com.gu.facia.client.models.{CollectionJson, ConfigJson} -import com.gu.facia.client.{AmazonSdkS3Client, ApiClient} +import com.gu.facia.client.{ApiClient, Environment} import play.api.libs.json.{Format, Json} import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future -object Amazon { - val amazonS3Client = AmazonS3ClientBuilder.standard().withRegion(Regions.EU_WEST_1).withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials("key", "pass"))).build() -} - -class ApiTestClient extends ApiClient("bucket", "DEV", AmazonSdkS3Client(Amazon.amazonS3Client)) with ResourcesHelper { +class ApiTestClient extends ApiClient with ResourcesHelper { + private val environment = Environment.Dev private def retrieve[A: Format](key: String): Future[Option[A]] = Future.successful(slurp(key).map(Json.parse).flatMap(_.asOpt[A])) override def config: Future[ConfigJson] = - retrieve[ConfigJson](s"$environment/frontsapi/config/config.json").map(_.get) + retrieve[ConfigJson](environment.configS3Path).map(_.get) override def collection(id: String): Future[Option[CollectionJson]] = - retrieve[CollectionJson](s"$environment/frontsapi/collection/$id/collection.json") + retrieve[CollectionJson](environment.collectionS3Path(id)) } diff --git a/facia-json/src/test/scala/com/gu/facia/client/lib/ResourcesHelper.scala b/facia-json/src/test/scala/com/gu/facia/client/lib/ResourcesHelper.scala index 4a0f1e34..c862aed3 100644 --- a/facia-json/src/test/scala/com/gu/facia/client/lib/ResourcesHelper.scala +++ b/facia-json/src/test/scala/com/gu/facia/client/lib/ResourcesHelper.scala @@ -1,11 +1,17 @@ package com.gu.facia.client.lib import com.gu.facia.client.FaciaSuccess +import org.apache.commons.io.IOUtils + +import java.nio.charset.StandardCharsets.UTF_8 trait ResourcesHelper { + def slurpBytes(path: String): Option[Array[Byte]] = + Option(getClass.getClassLoader.getResource(path)).map(url => IOUtils.toByteArray(url.openStream())) + def slurp(path: String): Option[String] = - Option(getClass.getClassLoader.getResource(path)).map(scala.io.Source.fromURL(_).mkString) + slurpBytes(path).map(bytes => new String(bytes, UTF_8)) - def slurpOrDie(path: String) = slurp(path).map(_.getBytes).map(FaciaSuccess.apply).getOrElse { + def slurpOrDie(path: String) = slurpBytes(path).map(FaciaSuccess.apply).getOrElse { throw new RuntimeException(s"Required resource $path not on class path") } } \ No newline at end of file diff --git a/fapi-client/src/main/scala/com/gu/facia/api/Response.scala b/fapi-client-core/src/main/scala/com/gu/facia/api/Response.scala similarity index 100% rename from fapi-client/src/main/scala/com/gu/facia/api/Response.scala rename to fapi-client-core/src/main/scala/com/gu/facia/api/Response.scala diff --git a/fapi-client-core/src/main/scala/com/gu/facia/client/Environment.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/Environment.scala new file mode 100644 index 00000000..8df3ce0f --- /dev/null +++ b/fapi-client-core/src/main/scala/com/gu/facia/client/Environment.scala @@ -0,0 +1,15 @@ +package com.gu.facia.client + +case class Environment(name: String) { + private val s3PathPrefix: String = s"$name/frontsapi" + + val configS3Path: String = s"$s3PathPrefix/config/config.json" + def collectionS3Path(id: String): String = s"$s3PathPrefix/collection/$id/collection.json" +} + +object Environment { + val Prod = Environment("PROD") + val Code = Environment("CODE") + val Dev = Environment("DEV") + val Test = Environment("TEST") +} \ No newline at end of file diff --git a/facia-json/src/main/scala/com/gu/facia/client/FaciaResult.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/FaciaResult.scala similarity index 100% rename from facia-json/src/main/scala/com/gu/facia/client/FaciaResult.scala rename to fapi-client-core/src/main/scala/com/gu/facia/client/FaciaResult.scala diff --git a/facia-json/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala similarity index 100% rename from facia-json/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala rename to fapi-client-core/src/main/scala/com/gu/facia/client/JsonDeserialisationError.scala diff --git a/fapi-client-core/src/main/scala/com/gu/facia/client/S3Client.scala b/fapi-client-core/src/main/scala/com/gu/facia/client/S3Client.scala new file mode 100644 index 00000000..ed741f1c --- /dev/null +++ b/fapi-client-core/src/main/scala/com/gu/facia/client/S3Client.scala @@ -0,0 +1,16 @@ +package com.gu.facia.client + +import scala.concurrent.Future + +/** + * Legacy class for mocking in tests, but also previously used to allow library users to define a properly + * asynchronous S3 client (back when the one in the AWS SDK was synchronous only). + * + * Note that use of `S3Client` is now discouraged, as `facia-scala-client` now supports caching using the + * `etag-caching` library, which provides its own more powerful abstraction for fetching & parsing data, and + * `com.gu.etagcaching.aws.sdkv2.s3.S3ObjectFetching`, a ready-made implementation of fetching that includes + * support for ETags & Conditional Requests, decoding S3 exceptions, etc. + */ +trait S3Client { + def get(bucket: String, path: String): Future[FaciaResult] +} diff --git a/fapi-client-core/src/test/scala/com/gu/facia/client/SdkIndependenceTest.scala b/fapi-client-core/src/test/scala/com/gu/facia/client/SdkIndependenceTest.scala new file mode 100644 index 00000000..1517f585 --- /dev/null +++ b/fapi-client-core/src/test/scala/com/gu/facia/client/SdkIndependenceTest.scala @@ -0,0 +1,31 @@ +package com.gu.facia.client + +import org.scalatest.Assertion +import org.scalatest.flatspec.AnyFlatSpec + +import java.lang.Class.forName + +/** + * Ideally, the whole of `facia-scala-client` would be independent of AWS SDK version - we'd _like_ consumers + * of this library to be able to use whatever AWS SDK version they want, without us pulling in dependency on + * either SDK version. For `facia-scala-client`, this is an attainable goal, as the only AWS API action it performs + * is fetching from S3, and https://github.com/guardian/etag-caching provides the `S3ByteArrayFetching` abstraction + * that encapsulates this action without tying to a specific AWS SDK version. + * + * Due to legacy code compatibility, we can't completely remove AWS SDK v1 from `fapi-client` for now, but + * we _have_ removed it from `fapi-client-core`. This `SdkIndependenceTest` keeps us honest, and ensures we + * don't accidentally re-add the dependency. + */ +class SdkIndependenceTest extends AnyFlatSpec { + + private def assertClassIsUnavailable(className: String): Assertion = + assertThrows[ClassNotFoundException](forName(className)) + + "fapi-client-core" should "be independent of AWS SDKs (ie should not rely on AWS SDK v1 or v2)" in { + assertClassIsUnavailable("com.amazonaws.services.s3.AmazonS3") // AWS SDK v1 + assertClassIsUnavailable("software.amazon.awssdk.auth.credentials.AwsCredentialsProvider") // AWS SDK v2 + } + it should "be independent of Play-Json, or at least that would be nice" in { + assertClassIsUnavailable("play.api.libs.json.Format") + } +} \ No newline at end of file diff --git a/fapi-client/src/test/scala/lib/IntegrationTestConfig.scala b/fapi-client/src/test/scala/lib/IntegrationTestConfig.scala index 81697e67..82836588 100644 --- a/fapi-client/src/test/scala/lib/IntegrationTestConfig.scala +++ b/fapi-client/src/test/scala/lib/IntegrationTestConfig.scala @@ -1,11 +1,12 @@ package lib -import com.amazonaws.auth._ -import com.amazonaws.auth.profile.ProfileCredentialsProvider -import com.amazonaws.services.s3.AmazonS3ClientBuilder +import software.amazon.awssdk.auth.credentials.{EnvironmentVariableCredentialsProvider, ProfileCredentialsProvider} import com.gu.contentapi.client.GuardianContentClient -import com.gu.facia.client.{AmazonSdkS3Client, ApiClient} -import com.amazonaws.regions.Regions.EU_WEST_1 +import com.gu.etagcaching.aws.sdkv2.s3.S3ObjectFetching +import com.gu.facia.client.{ApiClient, Environment} +import software.amazon.awssdk.auth.credentials.{AwsCredentialsProvider, AwsCredentialsProviderChain} +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3AsyncClient private case class TestContentApiClient(override val apiKey: String, override val targetUrl: String) extends GuardianContentClient(apiKey) @@ -22,16 +23,16 @@ trait IntegrationTestConfig extends ExecutionContext { } } - implicit val apiClient: ApiClient = { - val credentialsProvider = new AWSCredentialsProviderChain( - new EnvironmentVariableCredentialsProvider(), - new SystemPropertiesCredentialsProvider(), - new ProfileCredentialsProvider(awsProfileName) - ) - val amazonS3Client = AmazonS3ClientBuilder.standard() - .withRegion(EU_WEST_1) - .withCredentials(credentialsProvider) - .build() - ApiClient("facia-tool-store", "DEV", AmazonSdkS3Client(amazonS3Client)) - } + def credentialsForDevAndCI(devProfile: String, ciCreds: AwsCredentialsProvider): AwsCredentialsProviderChain = + AwsCredentialsProviderChain.of(ciCreds, ProfileCredentialsProvider.builder().profileName(devProfile).build()) + + private val s3AsyncClient: S3AsyncClient = S3AsyncClient.builder() + .region(Region.EU_WEST_1) + .credentialsProvider(credentialsForDevAndCI(awsProfileName, EnvironmentVariableCredentialsProvider.create())).build() + + implicit val apiClient: ApiClient = ApiClient.withCaching( + "facia-tool-store", + Environment.Dev, + S3ObjectFetching.byteArraysWith(s3AsyncClient) + ) } diff --git a/project/dependencies.scala b/project/dependencies.scala index 71171aa1..5cc564e1 100644 --- a/project/dependencies.scala +++ b/project/dependencies.scala @@ -2,8 +2,11 @@ import sbt._ object Dependencies { val capiVersion = "32.0.0" + val eTagCachingVersion = "7.0.0" - val awsSdk = "com.amazonaws" % "aws-java-sdk-s3" % "1.12.673" + val awsS3SdkV1 = "com.amazonaws" % "aws-java-sdk-s3" % "1.12.765" + val eTagCachingS3Base = "com.gu.etag-caching" %% "aws-s3-base" % eTagCachingVersion + val eTagCachingS3SupportForTesting = "com.gu.etag-caching" %% "aws-s3-sdk-v2" % eTagCachingVersion % Test val commonsIo = "org.apache.commons" % "commons-io" % "1.3.2" val contentApi = "com.gu" %% "content-api-client" % capiVersion val contentApiDefault = "com.gu" %% "content-api-client-default" % capiVersion % Test