Skip to content

Commit

Permalink
add JMH benchmark for Parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
clairemcginty committed Sep 17, 2024
1 parent be75cc3 commit aeed545
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
8 changes: 7 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,7 @@ lazy val jmh: Project = project
cats % Test,
datastore % Test,
guava % Test,
parquet % "test->test",
protobuf % "test->test",
scalacheck % Test,
tensorflow % Test,
Expand All @@ -727,7 +728,12 @@ lazy val jmh: Project = project
"com.google.apis" % "google-api-services-bigquery" % bigqueryVersion % Test,
"com.google.cloud.datastore" % "datastore-v1-proto-client" % datastoreVersion % Test,
"org.apache.avro" % "avro" % avroVersion % Test,
"org.tensorflow" % "tensorflow-core-api" % tensorflowVersion % Test
"org.tensorflow" % "tensorflow-core-api" % tensorflowVersion % Test,
"org.apache.parquet" % "parquet-avro" % parquetVersion % Test,
"org.apache.parquet" % "parquet-column" % parquetVersion % Test,
"org.apache.parquet" % "parquet-hadoop" % parquetVersion % Test,
"org.apache.hadoop" % "hadoop-common" % hadoopVersion % Test,
"org.apache.hadoop" % "hadoop-mapreduce-client-core" % hadoopVersion % Test
)
)

Expand Down
56 changes: 55 additions & 1 deletion jmh/src/test/scala/magnolify/jmh/MagnolifyBench.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@

package magnolify.jmh

import java.util.concurrent.TimeUnit
import magnolify.jmh.MagnolifyBench.nested
import magnolify.parquet.{ParquetType, TestInputFile, TestOutputFile}

import java.util.concurrent.TimeUnit
import magnolify.scalacheck.auto._
import magnolify.test.Simple._
import org.apache.parquet.hadoop.{ParquetReader, ParquetWriter}
import org.scalacheck._
import org.openjdk.jmh.annotations._

Expand Down Expand Up @@ -87,6 +90,57 @@ class AvroBench {
@Benchmark def avroSchema: Schema = AvroType[Nested].schema
}

@State(Scope.Benchmark)
class ParquetReadState {
private val parquetType = ParquetType[Nested]
var out: TestOutputFile = null
var reader: ParquetReader[Nested] = null

@Setup(Level.Invocation)
def setup(): Unit = {
out = new TestOutputFile
val writer = parquetType.writeBuilder(out).build()
writer.write(nested)
writer.close()

val in = new TestInputFile(out.getBytes)
reader = parquetType.readBuilder(in).build()
}

@TearDown(Level.Invocation)
def tearDown(): Unit = {
reader.close()
}
}

@State(Scope.Benchmark)
class ParquetWriteState {
private val parquetType = ParquetType[Nested]
var writer: ParquetWriter[Nested] = null

@Setup(Level.Invocation)
def setup(): Unit = {
val out = new TestOutputFile
writer = parquetType.writeBuilder(out).build()
}

@TearDown(Level.Invocation)
def tearDown(): Unit = {
writer.close()
}
}


@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
class ParquetBench {
import MagnolifyBench._

@Benchmark def parquetWrite(state: ParquetWriteState): Unit = state.writer.write(nested)
@Benchmark def parquetRead(state: ParquetReadState): Nested = state.reader.read()
}

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
Expand Down

0 comments on commit aeed545

Please sign in to comment.