From 1a5ecbb5290e8639e76e85aeead588cff99eccfb Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Wed, 21 Aug 2024 22:52:30 -0400 Subject: [PATCH 01/31] wip --- .../main/scala/magnolify/avro/AvroType.scala | 2 +- .../magnolify/avro/logical/package.scala | 97 ++--- .../scala/magnolify/beam/BeamSchemaType.scala | 342 ++++++++++++++++++ .../magnolify/beam/logical/package.scala | 92 +++++ .../scala/magnolify/beam/unsafe/package.scala | 8 + .../magnolify/beam/BeamSchemaTypeSuite.scala | 133 +++++++ build.sbt | 23 +- .../test/scala/magnolify/cats/TestEq.scala | 2 + .../magnolify/parquet/logical/package.scala | 73 ++-- .../magnolify/scalacheck/TestArbitrary.scala | 4 + .../main/scala/magnolify/shared/Time.scala | 134 +++++++ 11 files changed, 793 insertions(+), 117 deletions(-) create mode 100644 beam/src/main/scala/magnolify/beam/BeamSchemaType.scala create mode 100644 beam/src/main/scala/magnolify/beam/logical/package.scala create mode 100644 beam/src/main/scala/magnolify/beam/unsafe/package.scala create mode 100644 beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala create mode 100644 shared/src/main/scala/magnolify/shared/Time.scala diff --git a/avro/src/main/scala/magnolify/avro/AvroType.scala b/avro/src/main/scala/magnolify/avro/AvroType.scala index 7ede81ffe..d4f853cf7 100644 --- a/avro/src/main/scala/magnolify/avro/AvroType.scala +++ b/avro/src/main/scala/magnolify/avro/AvroType.scala @@ -221,7 +221,7 @@ object AvroField { override def from(v: String)(cm: CaseMapper): String = v override def to(v: String)(cm: CaseMapper): String = v } - +f implicit def afEnum[T](implicit et: EnumType[T], lp: shapeless.LowPriority): AvroField[T] = // Avro 1.9+ added a type parameter for `GenericEnumSymbol`, breaking 1.8 compatibility // Some reader, i.e. `AvroParquetReader` reads enums as `Utf8` diff --git a/avro/src/main/scala/magnolify/avro/logical/package.scala b/avro/src/main/scala/magnolify/avro/logical/package.scala index a6d0d7473..35620cc56 100644 --- a/avro/src/main/scala/magnolify/avro/logical/package.scala +++ b/avro/src/main/scala/magnolify/avro/logical/package.scala @@ -25,106 +25,61 @@ import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder} import java.util.concurrent.TimeUnit package object logical { + import magnolify.shared.Time._ // Duplicate implementation from org.apache.avro.data.TimeConversions // to support both 1.8 (joda-time based) and 1.9+ (java-time based) object micros { - private def toTimestampMicros(microsFromEpoch: Long): Instant = { - val epochSeconds = microsFromEpoch / 1000000L - val nanoAdjustment = (microsFromEpoch % 1000000L) * 1000L; - Instant.ofEpochSecond(epochSeconds, nanoAdjustment) - } - - private def fromTimestampMicros(instant: Instant): Long = { - val seconds = instant.getEpochSecond - val nanos = instant.getNano - if (seconds < 0 && nanos > 0) { - val micros = Math.multiplyExact(seconds + 1, 1000000L) - val adjustment = (nanos / 1000L) - 1000000 - Math.addExact(micros, adjustment) - } else { - val micros = Math.multiplyExact(seconds, 1000000L) - Math.addExact(micros, nanos / 1000L) - } - } - implicit val afTimestampMicros: AvroField[Instant] = - AvroField.logicalType[Long](LogicalTypes.timestampMicros())(toTimestampMicros)( - fromTimestampMicros + AvroField.logicalType[Long](LogicalTypes.timestampMicros())(microsToInstant)( + microsFromInstant ) implicit val afTimeMicros: AvroField[LocalTime] = - AvroField.logicalType[Long](LogicalTypes.timeMicros()) { us => - LocalTime.ofNanoOfDay(TimeUnit.MICROSECONDS.toNanos(us)) - } { time => - TimeUnit.NANOSECONDS.toMicros(time.toNanoOfDay) - } + AvroField.logicalType[Long](LogicalTypes.timeMicros())(microsToLocalTime)(microsFromLocalTime) // `LogicalTypes.localTimestampMicros()` is Avro 1.10 implicit val afLocalTimestampMicros: AvroField[LocalDateTime] = - AvroField.logicalType[Long](new LogicalType("local-timestamp-micros")) { microsFromEpoch => - val instant = toTimestampMicros(microsFromEpoch) - LocalDateTime.ofInstant(instant, ZoneOffset.UTC) - } { timestamp => - val instant = timestamp.toInstant(ZoneOffset.UTC) - fromTimestampMicros(instant) - } + AvroField.logicalType[Long](new LogicalType("local-timestamp-micros"))(microsToLocalDateTime)( + microsFromLocalDateTime + ) // avro 1.8 uses joda-time implicit val afJodaTimestampMicros: AvroField[joda.DateTime] = - AvroField.logicalType[Long](LogicalTypes.timestampMicros()) { microsFromEpoch => - new joda.DateTime(microsFromEpoch / 1000, joda.DateTimeZone.UTC) - } { timestamp => - 1000 * timestamp.getMillis - } + AvroField.logicalType[Long](LogicalTypes.timestampMicros())(microsToJodaDateTime)( + microsFromJodaDateTime + ) implicit val afJodaTimeMicros: AvroField[joda.LocalTime] = - AvroField.logicalType[Long](LogicalTypes.timeMicros()) { microsFromMidnight => - joda.LocalTime.fromMillisOfDay(microsFromMidnight / 1000) - } { time => - // from LossyTimeMicrosConversion - 1000L * time.millisOfDay().get() - } + AvroField.logicalType[Long](LogicalTypes.timeMicros())(microsToJodaLocalTime)( + microsFromJodaLocalTime + ) } object millis { implicit val afTimestampMillis: AvroField[Instant] = - AvroField.logicalType[Long](LogicalTypes.timestampMillis()) { millisFromEpoch => - Instant.ofEpochMilli(millisFromEpoch) - } { timestamp => - timestamp.toEpochMilli - } + AvroField.logicalType[Long](LogicalTypes.timestampMillis())(millisToInstant)( + millisFromInstant + ) implicit val afTimeMillis: AvroField[LocalTime] = - AvroField.logicalType[Int](LogicalTypes.timeMillis()) { millisFromMidnight => - LocalTime.ofNanoOfDay(TimeUnit.MILLISECONDS.toNanos(millisFromMidnight.toLong)) - } { time => - TimeUnit.NANOSECONDS.toMillis(time.toNanoOfDay).toInt - } + AvroField.logicalType[Int](LogicalTypes.timeMillis())(millisToLocalTime)(millisFromLocalTime) // `LogicalTypes.localTimestampMillis` is Avro 1.10.0+ implicit val afLocalTimestampMillis: AvroField[LocalDateTime] = - AvroField.logicalType[Long](new LogicalType("local-timestamp-millis")) { millisFromEpoch => - val instant = Instant.ofEpochMilli(millisFromEpoch) - LocalDateTime.ofInstant(instant, ZoneOffset.UTC) - } { timestamp => - val instant = timestamp.toInstant(ZoneOffset.UTC) - instant.toEpochMilli - } + AvroField.logicalType[Long](new LogicalType("local-timestamp-millis"))(millisToLocalDateTime)( + millisFromLocalDateTime + ) // avro 1.8 uses joda-time implicit val afJodaTimestampMillis: AvroField[joda.DateTime] = - AvroField.logicalType[Long](LogicalTypes.timestampMillis()) { millisFromEpoch => - new joda.DateTime(millisFromEpoch, joda.DateTimeZone.UTC) - } { timestamp => - timestamp.getMillis - } + AvroField.logicalType[Long](LogicalTypes.timestampMillis())(millisToJodaDateTime)( + millisFromJodaDateTime + ) implicit val afJodaTimeMillis: AvroField[joda.LocalTime] = - AvroField.logicalType[Int](LogicalTypes.timeMillis()) { millisFromMidnight => - joda.LocalTime.fromMillisOfDay(millisFromMidnight.toLong) - } { time => - time.millisOfDay().get() - } + AvroField.logicalType[Int](LogicalTypes.timeMillis())(millisToJodaLocalTime)( + millisFromJodaLocalTime + ) } object bigquery { diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala new file mode 100644 index 000000000..1c9c4033a --- /dev/null +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -0,0 +1,342 @@ +/* + * Copyright 2024 Spotify AB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package magnolify.beam + +import magnolia1.* +import magnolify.shared.* +import org.apache.beam.sdk.schemas.Schema +import org.apache.beam.sdk.schemas.Schema.{FieldType, LogicalType} +import org.apache.beam.sdk.values.Row +import org.joda.time as joda +import com.google.protobuf.ByteString +import magnolify.shims.FactoryCompat +import org.apache.beam.sdk.schemas.logicaltypes as logicaltypes + +import java.nio.ByteBuffer +import java.time.LocalDate +import java.{time as jt, util as ju} +import scala.annotation.implicitNotFound +import scala.collection.concurrent +import scala.jdk.CollectionConverters.* + +// https://beam.apache.org/documentation/programming-guide/#schema-definition +sealed trait BeamSchemaType[T] extends Converter[T, Row, Row] { + val schema: Schema + def apply(r: Row): T = from(r) + def apply(t: T): Row = to(t) +} + +object BeamSchemaType { + implicit def apply[T: BeamSchemaField]: BeamSchemaType[T] = + BeamSchemaType[T](CaseMapper.identity) + + def apply[T](cm: CaseMapper)(implicit f: BeamSchemaField[T]): BeamSchemaType[T] = { + f match { + case r: BeamSchemaField.Record[_] => + r.schema(cm) // fail fast on bad annotations + new BeamSchemaType[T] { + private val caseMapper: CaseMapper = cm + @transient override lazy val schema: Schema = r.schema(caseMapper) + + override def from(v: Row): T = r.from(v)(caseMapper) + override def to(v: T): Row = r.to(v)(caseMapper) + } + case _ => + throw new IllegalArgumentException( + s"BeamSchemaType can only be created from Record. Got $f" + ) + } + } +} + +sealed trait BeamSchemaField[T] extends Serializable { + type FromT + type ToT + def fieldType(cm: CaseMapper): FieldType + def from(v: FromT)(cm: CaseMapper): T + def to(v: T)(cm: CaseMapper): ToT + def fromAny(v: Any)(cm: CaseMapper): T = from(v.asInstanceOf[FromT])(cm) +} + +object BeamSchemaField { + sealed trait Aux[T, From, To] extends BeamSchemaField[T] { + override type FromT = From + override type ToT = To + } + + private[magnolify] def aux[T, From, To]( + ft: CaseMapper => FieldType + )(fromFn: From => T)(toFn: T => To): BeamSchemaField[T] = + new Aux[T, From, To] { + override def fieldType(cm: CaseMapper): FieldType = ft(cm) + override def from(v: FromT)(cm: CaseMapper): T = fromFn(v) + override def to(v: T)(cm: CaseMapper): ToT = toFn(v) + } + +// private[magnolify] def aux2[T, Repr](fieldTypeFn: CaseMapper => FieldType)(fromFn: Repr => T)( +// toFn: T => Repr +// ): BeamSchemaField[T] = +// aux[T, Repr, Repr](fieldTypeFn)(fromFn)(toFn) + + private[magnolify] def id[T](ft: CaseMapper => FieldType): BeamSchemaField[T] = + aux[T, T, T](ft)(identity)(identity) + + private[magnolify] def logicalId[T](ft: CaseMapper => FieldType): BeamSchemaField[T] = id(ft) + +// private[magnolify] def logicalId[T](ft: CaseMapper => FieldType): BeamSchemaLogicalField[T] = +// new BeamSchemaLogicalField[T] { +// type FromT = T +// type ToT = T +// override def fieldType(cm: CaseMapper): FieldType = ft(cm) +// override def from(v: FromT)(cm: CaseMapper): T = v +// override def to(v: T)(cm: CaseMapper): ToT = v +// } + + def from[T]: FromWord[T] = new FromWord[T] + + class FromWord[T] { + def apply[U](f: T => U)(g: U => T)(implicit bsf: BeamSchemaField[T]): BeamSchemaField[U] = + new Aux[U, bsf.FromT, bsf.ToT] { + override def fieldType(cm: CaseMapper): FieldType = bsf.fieldType(cm) + override def from(v: FromT)(cm: CaseMapper): U = f(bsf.from(v)(cm)) + override def to(v: U)(cm: CaseMapper): ToT = bsf.to(g(v))(cm) + } + } + +// def logicalType[T]: LogicalWord[T] = new LogicalWord[T] +// +// class LogicalWord[T] { +// def apply[From, To](lt: LogicalType[From, ?], nullable: Boolean = false): BeamSchemaLogicalField[T] = { +// new BeamSchemaLogicalField[T] { +// type FromT = From +// type ToT = To +// override def logicalType: LogicalType[From, To] = lt +// override def fieldType(cm: CaseMapper): FieldType = +// FieldType.logicalType(logicalType).withNullable(nullable) +// override def from(v: From)(cm: CaseMapper): T = +// throw new UnsupportedOperationException("Do not call from() on logical types") +// override def to(v: T)(cm: CaseMapper): To = +// throw new UnsupportedOperationException("Do not call to() on logical types") +// } +// } +// } + + sealed trait Record[T] extends Aux[T, Row, Row] { + @transient private lazy val schemaCache: concurrent.Map[ju.UUID, Schema] = + concurrent.TrieMap.empty + protected def buildSchema(cm: CaseMapper): Schema + def schema(cm: CaseMapper): Schema = schemaCache.getOrElseUpdate(cm.uuid, buildSchema(cm)) + } + + // //////////////////////////////////////////////// + + type Typeclass[T] = BeamSchemaField[T] + implicit def gen[T]: BeamSchemaField[T] = macro Magnolia.gen[T] + + @implicitNotFound("Cannot derive BeamSchemaField for sealed trait") // TODO does this make sense? + private sealed trait Dispatchable[T] + def split[T: Dispatchable](sealedTrait: SealedTrait[Typeclass, T]): BeamSchemaField[T] = ??? + + def join[T](caseClass: CaseClass[Typeclass, T]): BeamSchemaField[T] = { + if (caseClass.isValueClass) { + // FIXME +// val p = caseClass.parameters.head +// val tc = p.typeclass +// new BeamSchemaField[T] { +// override type FromT = tc.FromT +// override type ToT = tc.ToT +// // override protected def buildSchema(cm: CaseMapper): Schema = tc.buildSchema(cm) +// override def from(v: FromT)(cm: CaseMapper): T = caseClass.construct(_ => tc.fromAny(v)(cm)) +// override def to(v: T)(cm: CaseMapper): ToT = tc.to(p.dereference(v))(cm) +// } + ??? + } else { + new Record[T] { + override def fieldType(cm: CaseMapper): FieldType = FieldType.row(schema(cm)) + + override protected def buildSchema(cm: CaseMapper): Schema = + caseClass.parameters + .foldLeft(Schema.builder()) { case (s, p) => + s.addField(p.label, p.typeclass.fieldType(cm)) + } + .build() + + override def from(v: Row)(cm: CaseMapper): T = + caseClass.construct(p => p.typeclass.fromAny(v.getValue[Any](p.index))(cm)) + + override def to(v: T)(cm: CaseMapper): Row = { + val zero: Either[Row.Builder, Row.FieldValueBuilder] = Left(Row.withSchema(schema(cm))) + val eitherBuilder = caseClass.parameters + .foldLeft(zero) { (eitherBuilder, p) => + val value = p.typeclass.to(p.dereference(v))(cm) + eitherBuilder match { + case Left(rowBuilder) => Right(rowBuilder.withFieldValue(p.index, value)) + case Right(fieldBuilder) => Right(fieldBuilder.withFieldValue(p.index, value)) + } + } + eitherBuilder match { + case Left(rb) => rb.build() + case Right(fb) => fb.build() + } + } + } + } + } + + // BYTE An 8-bit signed value + implicit val bsfByte: BeamSchemaField[Byte] = id[Byte](_ => FieldType.BYTE) + implicit val bsfChar: BeamSchemaField[Char] = from[Byte](_.toChar)(_.toByte) + // INT16 A 16-bit signed value + implicit val bsfShort: BeamSchemaField[Short] = id[Short](_ => FieldType.INT16) + // INT32 A 32-bit signed value + implicit val bsfInt: BeamSchemaField[Int] = id[Int](_ => FieldType.INT32) + // INT64 A 64-bit signed value + implicit val bsfLong: BeamSchemaField[Long] = id[Long](_ => FieldType.INT64) + // FLOAT A 32-bit IEEE 754 floating point number + implicit val bsfFloat: BeamSchemaField[Float] = id[Float](_ => FieldType.FLOAT) + // DOUBLE A 64-bit IEEE 754 floating point number + implicit val bsfDouble: BeamSchemaField[Double] = id[Double](_ => FieldType.DOUBLE) + // STRING A string + implicit val bsfString: BeamSchemaField[String] = id[String](_ => FieldType.STRING) + implicit val bsfCharSeq: BeamSchemaField[CharSequence] = + from[String](_.asInstanceOf[CharSequence])(_.toString) + // BOOLEAN A boolean value + implicit val bsfBoolean: BeamSchemaField[Boolean] = id[Boolean](_ => FieldType.BOOLEAN) + // BYTES A raw byte array + implicit val bsfByteArray: BeamSchemaField[Array[Byte]] = id[Array[Byte]](_ => FieldType.BYTES) + implicit val bsfByteBuffer: BeamSchemaField[ByteBuffer] = + from[Array[Byte]](x => ByteBuffer.wrap(x))(_.array()) + implicit val bsfByteString: BeamSchemaField[ByteString] = + from[Array[Byte]](x => ByteString.copyFrom(x))(_.toByteArray) + // DECIMAL An arbitrary-precision decimal type + implicit val bsfDecimal: BeamSchemaField[BigDecimal] = + aux[BigDecimal, java.math.BigDecimal, java.math.BigDecimal](_ => FieldType.DECIMAL)( + BigDecimal.apply + )(_.bigDecimal) + + implicit val bsfUUID: BeamSchemaField[ju.UUID] = + logicalId[ju.UUID](_ => FieldType.logicalType(new logicaltypes.UuidLogicalType)) +// new BeamSchemaLogicalField[ju.UUID] { +// type FromT = ju.UUID +// type ToT = ju.UUID +// val logicalType = new logicaltypes.UuidLogicalType +// override def fieldType(cm: CaseMapper): FieldType = FieldType.logicalType(logicalType) +// override def from(v: ju.UUID)(cm: CaseMapper): ju.UUID = v +// override def to(v: ju.UUID)(cm: CaseMapper): ju.UUID = v +// } + +// implicit val bsfLocalDate: BeamSchemaField[jt.LocalDate] = +// from[Long](LocalDate.ofEpochDay)(_.toEpochDay) + implicit val bsfLocalDate: BeamSchemaField[jt.LocalDate] = + logicalId[jt.LocalDate](_ => FieldType.logicalType(new logicaltypes.Date)) + private lazy val EpochJodaDate = new joda.LocalDate(1970, 1, 1) + implicit val bsfJodaLocalDate: BeamSchemaField[joda.LocalDate] = + from[Int](daysFromEpoch => EpochJodaDate.plusDays(daysFromEpoch))(d => + joda.Days.daysBetween(EpochJodaDate, d).getDays + ) + + implicit def bsfEnum[T](implicit et: EnumType[T], lp: shapeless.LowPriority): BeamSchemaField[T] = + new BeamSchemaField[T] { + type FromT = logicaltypes.EnumerationType.Value + type ToT = logicaltypes.EnumerationType.Value + + @transient private lazy val enumTypeCache: concurrent.Map[ju.UUID, EnumType[T]] = + concurrent.TrieMap.empty + @transient private lazy val beamEnumTypeCache + : concurrent.Map[ju.UUID, logicaltypes.EnumerationType] = + concurrent.TrieMap.empty + + private def enumType(cm: CaseMapper): EnumType[T] = + enumTypeCache.getOrElseUpdate(cm.uuid, et.map(cm)) + private def beamEnumType(cm: CaseMapper): logicaltypes.EnumerationType = + beamEnumTypeCache.getOrElseUpdate( + cm.uuid, + logicaltypes.EnumerationType.create(enumType(cm).values.asJava) + ) + override def fieldType(cm: CaseMapper): FieldType = FieldType.logicalType(beamEnumType(cm)) + override def to(v: T)(cm: CaseMapper): ToT = beamEnumType(cm).valueOf(enumType(cm).to(v)) + override def from(v: FromT)(cm: CaseMapper): T = + enumType(cm).from(beamEnumType(cm).toString(v)) + } + + implicit def bsfMap[K, V](implicit + bsfK: BeamSchemaField[K], + bsfV: BeamSchemaField[V] + ): BeamSchemaField[Map[K, V]] = + new Aux[Map[K, V], ju.Map[bsfK.FromT, bsfV.FromT], ju.Map[bsfK.ToT, bsfV.ToT]] { + override def fieldType(cm: CaseMapper): FieldType = + FieldType.map(bsfK.fieldType(cm), bsfV.fieldType(cm)) + // TODO need to convert/not convert logical types + override def from(v: ju.Map[bsfK.FromT, bsfV.FromT])(cm: CaseMapper): Map[K, V] = + v.asScala.map { case (k, v) => bsfK.from(k)(cm) -> bsfV.from(v)(cm) }.toMap + override def to(v: Map[K, V])(cm: CaseMapper): ju.Map[bsfK.ToT, bsfV.ToT] = + v.map { case (k, v) => bsfK.to(k)(cm) -> bsfV.to(v)(cm) }.asJava + } + + implicit def bsfIterable[T, C[_]](implicit + f: BeamSchemaField[T], + ti: C[T] => Iterable[T], + fc: FactoryCompat[T, C[T]] + ): BeamSchemaField[C[T]] = { +// f match { +// case l: BeamSchemaLogicalField[T] => +// new BeamSchemaLogicalField[C[T]] { +// type FromT = ju.List[l.FromT] +// type ToT = ju.List[l.ToT] +// override def fieldType(cm: CaseMapper): FieldType = FieldType.iterable(l.fieldType(cm)) +// override def to(v: C[T])(cm: CaseMapper): ToT = +// v.iterator.map(l.to(_)(cm)).toList.asJava +// override def from(v: FromT)(cm: CaseMapper): C[T] = +// fc.fromSpecific(v.asScala.iterator.map(p => l.from(p)(cm))) +// } +// case _: BeamSchemaField[_] => + new Aux[C[T], ju.List[f.FromT], ju.List[f.ToT]] { + override def from(v: ju.List[f.FromT])(cm: CaseMapper): C[T] = + fc.fromSpecific(v.asScala.iterator.map(p => f.from(p)(cm))) + override def to(v: C[T])(cm: CaseMapper): ju.List[f.ToT] = + v.iterator.map(f.to(_)(cm)).toList.asJava + override def fieldType(cm: CaseMapper): FieldType = FieldType.iterable(f.fieldType(cm)) + } +// } + } + + implicit def bsfOption[T](implicit f: BeamSchemaField[T]): BeamSchemaField[Option[T]] = { +// f match { +// case l: BeamSchemaLogicalField[T] => +// new BeamSchemaLogicalField[Option[T]] { +// type FromT = l.FromT +// type ToT = l.ToT +// override def fieldType(cm: CaseMapper): FieldType = l.fieldType(cm).withNullable(true) +// override def to(v: Option[T])(cm: CaseMapper): ToT = v match { +// case None => null.asInstanceOf[ToT] +// case Some(value) => l.to(value)(cm) +// } +// override def from(v: FromT)(cm: CaseMapper): Option[T] = Option(v).map(l.from(_)(cm)) +// } +// case _: BeamSchemaField[T] => + new Aux[Option[T], f.FromT, f.ToT] { + override def from(v: f.FromT)(cm: CaseMapper): Option[T] = + if (v == null) None else Some(f.from(v)(cm)) + override def to(v: Option[T])(cm: CaseMapper): f.ToT = v match { + case None => null.asInstanceOf[f.ToT] + case Some(x) => f.to(x)(cm) + } + override def fieldType(cm: CaseMapper): FieldType = f.fieldType(cm).withNullable(true) + } +// } + } +} diff --git a/beam/src/main/scala/magnolify/beam/logical/package.scala b/beam/src/main/scala/magnolify/beam/logical/package.scala new file mode 100644 index 000000000..6475a2fdc --- /dev/null +++ b/beam/src/main/scala/magnolify/beam/logical/package.scala @@ -0,0 +1,92 @@ +/* + * Copyright 2024 Spotify AB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package magnolify.beam + +import org.joda.time as joda +import java.time as jt + +package object logical { + import magnolify.shared.Time._ + + object millis { + // joda + // DATETIME A timestamp represented as milliseconds since the epoch + // joda.DateTime only has millisecond resolution + implicit val bsfJodaDateTimeMillis: BeamSchemaField[joda.DateTime] = + BeamSchemaField.from[Long](millisToJodaDateTime)(millisFromJodaDateTime) + // DATETIME A timestamp represented as milliseconds since the epoch + implicit val bsfJodaInstantMillis: BeamSchemaField[joda.Instant] = + BeamSchemaField.from[Long](millisToJodaInstant)(millisFromJodaInstant) + implicit val bsfJodaDurationMillis: BeamSchemaField[joda.Duration] = + BeamSchemaField.from[Long](millisToJodaDuration)(millisFromJodaDuration) + implicit val bsfJodaLocalTimeMillis: BeamSchemaField[joda.LocalTime] = + BeamSchemaField.from[Int](millisToJodaLocalTime)(millisFromJodaLocalTime) + // java + implicit val bsfInstantMillis: BeamSchemaField[jt.Instant] = + BeamSchemaField.from[Long](millisToInstant)(millisFromInstant) + implicit val bsLocalTimeMillis: BeamSchemaField[jt.LocalTime] = + BeamSchemaField.from[Int](millisToLocalTime)(millisFromLocalTime) + implicit val bsfLocalDateTimeMillis: BeamSchemaField[jt.LocalDateTime] = + BeamSchemaField.from[Long](millisToLocalDateTime)(millisFromLocalDateTime) + implicit val bsfDurationMillis: BeamSchemaField[jt.Duration] = + BeamSchemaField.from[Long](millisToDuration)(millisFromDuration) + } + + object micros { + // joda.DateTime only has millisecond resolution, so excess precision is discarded + implicit val bsfJodaDateTimeMicros: BeamSchemaField[joda.DateTime] = + BeamSchemaField.from[Long](microsToJodaDateTime)(microsFromJodaDateTime) + // joda.Instant has millisecond precision, excess precision discarded + implicit val bsfJodaInstantMicros: BeamSchemaField[joda.Instant] = + BeamSchemaField.from[Long](microsToJodaInstant)(microsFromJodaInstant) + // joda.Duration has millisecond precision, excess precision discarded + implicit val bsfJodaDurationMicros: BeamSchemaField[joda.Duration] = + BeamSchemaField.from[Long](microsToJodaDuration)(microsFromJodaDuration) + implicit val bsfJodaLocalTimeMicros: BeamSchemaField[joda.LocalTime] = + BeamSchemaField.from[Long](microsToJodaLocalTime)(microsFromJodaLocalTime) + // java + implicit val bsfInstantMicros: BeamSchemaField[jt.Instant] = + BeamSchemaField.from[Long](microsToInstant)(microsFromInstant) + implicit val bsLocalTimeMicros: BeamSchemaField[jt.LocalTime] = + BeamSchemaField.from[Long](microsToLocalTime)(microsFromLocalTime) + implicit val bsfLocalDateTimeMicros: BeamSchemaField[jt.LocalDateTime] = + BeamSchemaField.from[Long](microsToLocalDateTime)(microsFromLocalDateTime) + implicit val bsfDurationMicros: BeamSchemaField[jt.Duration] = + BeamSchemaField.from[Long](microsToDuration)(microsFromDuration) + } + + object nanos { + // joda.DateTime only has millisecond resolution + implicit val bsfJodaDateTimeNanos: BeamSchemaField[joda.DateTime] = + BeamSchemaField.from[Long](nanosToJodaDateTime)(nanosFromJodaDateTime) + implicit val bsfJodaInstantNanos: BeamSchemaField[joda.Instant] = + BeamSchemaField.from[Long](nanosToJodaInstant)(nanosFromJodaInstant) + implicit val bsfJodaDurationNanos: BeamSchemaField[joda.Duration] = + BeamSchemaField.from[Long](nanosToJodaDuration)(nanosFromJodaDuration) + implicit val bsfJodaLocalTimeNanos: BeamSchemaField[joda.LocalTime] = + BeamSchemaField.from[Long](nanosToJodaLocalTime)(nanosFromJodaLocalTime) + // java + implicit val bsfInstantNanos: BeamSchemaField[jt.Instant] = + BeamSchemaField.from[Long](nanosToInstant)(nanosFromInstant) + implicit val bsLocalTimeNanos: BeamSchemaField[jt.LocalTime] = + BeamSchemaField.from[Long](nanosToLocalTime)(nanosFromLocalTime) + implicit val bsfLocalDateTimeNanos: BeamSchemaField[jt.LocalDateTime] = + BeamSchemaField.from[Long](nanosToLocalDateTime)(nanosFromLocalDateTime) + implicit val bsfDurationNanos: BeamSchemaField[jt.Duration] = + BeamSchemaField.from[Long](nanosToDuration)(nanosFromDuration) + } +} diff --git a/beam/src/main/scala/magnolify/beam/unsafe/package.scala b/beam/src/main/scala/magnolify/beam/unsafe/package.scala new file mode 100644 index 000000000..9379200cf --- /dev/null +++ b/beam/src/main/scala/magnolify/beam/unsafe/package.scala @@ -0,0 +1,8 @@ +package magnolify.beam + +import magnolify.shared._ + +package object unsafe { + implicit def afUnsafeEnum[T: EnumType]: BeamSchemaField[UnsafeEnum[T]] = + BeamSchemaField.from[String](UnsafeEnum.from[T])(UnsafeEnum.to[T]) +} diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala new file mode 100644 index 000000000..c15180c04 --- /dev/null +++ b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala @@ -0,0 +1,133 @@ +/* + * Copyright 2024 Spotify AB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package magnolify.beam + +import cats.* +import magnolify.cats.auto.* +import magnolify.cats.TestEq.* +import magnolify.scalacheck.auto.* +import magnolify.scalacheck.TestArbitrary.* +import magnolify.test.MagnolifySuite +import magnolify.test.Simple.* +import org.joda.time as joda +import org.scalacheck.{Arbitrary, Gen, Prop} + +import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} +import java.util.UUID +import scala.reflect.ClassTag + +class BeamSchemaTypeSuite extends MagnolifySuite { + private def test[T: Arbitrary: ClassTag](implicit + bst: BeamSchemaType[T], + eq: Eq[T] + ): Unit = { + // Ensure serializable even after evaluation of `schema` + bst.schema: Unit + ensureSerializable(bst) + + property(className[T]) { + Prop.forAll { (t: T) => + val converted = bst.apply(t) + val roundtripped = bst.apply(converted) + Prop.all(eq.eqv(t, roundtripped)) + } + } + } + + test[Integers] + test[Floats] + test[Required] + test[Nullable] + test[Repeated] + test[Nested] + test[Collections] + test[MoreCollections] + + test[Maps] + test[Logical] + + { + import magnolify.beam.unsafe._ + import magnolify.shared.TestEnumType._ + test[Enums] + test[UnsafeEnums] + } + + implicit val arbBigDecimal: Arbitrary[BigDecimal] = + Arbitrary(Gen.chooseNum(0, Int.MaxValue).map(BigDecimal(_))) + test[Decimal] + + { + import magnolify.beam.logical.millis._ + test[Time] + test[Joda] + } + +// { +// // FIXME need special Eq instances that are lossy +// import magnolify.beam.logical.micros._ +// test[Time] +// test[Joda] +// } +// +// { +//// FIXME need special Eq instances that are lossy +// import magnolify.beam.logical.nanos._ +// test[Time] +// test[Joda] +// } + +// { +// implicit val bst: BeamSchemaType[LowerCamel] = +// BeamSchemaType[LowerCamel](CaseMapper(_.toUpperCase)) +// test[LowerCamel] +// +// test("LowerCamel mapping") { +// val schema = bst.schema +// // FIXME +// } +// } + +} + +case class Decimal(bd: BigDecimal, bdo: Option[BigDecimal]) +case class Logical( + u: UUID, + uo: Option[UUID], + ul: List[UUID], + ulo: List[Option[UUID]] +) + +case class Time( + i: Instant, + d: LocalDate, + dt: LocalDateTime, + t: LocalTime +) +case class Joda( + i: joda.Instant, + dt: joda.DateTime, + lt: joda.LocalTime, + d: joda.Duration +) +case class Maps( + ms: Map[String, String], + mi: Map[Int, Int], + mso: Map[Option[String], Option[String]], + ml: Map[UUID, UUID], + mlo: Map[Option[UUID], Option[UUID]] +) diff --git a/build.sbt b/build.sbt index f601f343e..6e271ffba 100644 --- a/build.sbt +++ b/build.sbt @@ -24,6 +24,7 @@ val magnoliaScala3Version = "1.3.7" val algebirdVersion = "0.13.10" val avroVersion = Option(sys.props("avro.version")).getOrElse("1.11.3") +val beamVersion = "2.57.0" val bigqueryVersion = "v2-rev20240229-2.0.0" val bigtableVersion = "2.41.0" val catsVersion = "2.10.0" @@ -128,7 +129,7 @@ ThisBuild / crossScalaVersions := Seq(scala3, scala213, scala212) ThisBuild / githubWorkflowTargetBranches := Seq("main") ThisBuild / githubWorkflowJavaVersions := Seq(java17, java11) ThisBuild / tlJdkRelease := Some(8) -ThisBuild / tlFatalWarnings := true +ThisBuild / tlFatalWarnings := false ThisBuild / tlCiHeaderCheck := true ThisBuild / tlCiScalafmtCheck := true ThisBuild / tlCiDocCheck := true @@ -325,6 +326,7 @@ lazy val root = tlCrossRootProject ) .aggregate( avro, + beam, bigquery, bigtable, bom, @@ -488,6 +490,25 @@ lazy val avro = project ) ) +lazy val beam = project + .in(file("beam")) + .dependsOn( + shared, + cats % "test->test", + scalacheck % "test->test", + test % "test->test" + ) + .settings( + commonSettings, + protobufSettings, + moduleName := "magnolify-beam", + description := "Magnolia add-on for Apache Beam", + libraryDependencies ++= Seq( + "org.apache.beam" % "beam-sdks-java-core" % beamVersion % Provided, + "com.google.protobuf" % "protobuf-java" % protobufVersion % ProtobufConfig, + ) + ) + lazy val bigquery = project .in(file("bigquery")) .dependsOn( diff --git a/cats/src/test/scala/magnolify/cats/TestEq.scala b/cats/src/test/scala/magnolify/cats/TestEq.scala index 152cccbd8..22ecb0233 100644 --- a/cats/src/test/scala/magnolify/cats/TestEq.scala +++ b/cats/src/test/scala/magnolify/cats/TestEq.scala @@ -60,6 +60,8 @@ object TestEq { implicit val eqJodaDateTime: Eq[joda.DateTime] = Eq.fromUniversalEquals implicit val eqJodaLocalTime: Eq[joda.LocalTime] = Eq.fromUniversalEquals implicit val eqJodaLocalDateTime: Eq[joda.LocalDateTime] = Eq.fromUniversalEquals + implicit val eqJodaDuration: Eq[joda.Duration] = Eq.fromUniversalEquals + implicit val eqJodaInstant: Eq[joda.Instant] = Eq.fromUniversalEquals // enum implicit lazy val eqJavaEnum: Eq[JavaEnums.Color] = Eq.fromUniversalEquals diff --git a/parquet/src/main/scala/magnolify/parquet/logical/package.scala b/parquet/src/main/scala/magnolify/parquet/logical/package.scala index 9223c4342..2883349e8 100644 --- a/parquet/src/main/scala/magnolify/parquet/logical/package.scala +++ b/parquet/src/main/scala/magnolify/parquet/logical/package.scala @@ -23,84 +23,69 @@ import org.apache.parquet.schema.LogicalTypeAnnotation import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit package object logical { + import magnolify.shared.Time._ // TIME (millis i32, micros i64, nanos, i64), UTC true/false // TIMESTAMP (millis, micros, nanos), UTC true/false - object millis { - private val unit = TimeUnit.MILLIS + private trait TimeTypes { + protected def unit: TimeUnit + protected def ts(adjusted: Boolean): LogicalTypeAnnotation = + LogicalTypeAnnotation.timestampType(adjusted, unit) + protected def time(adjusted: Boolean): LogicalTypeAnnotation = + LogicalTypeAnnotation.timeType(adjusted, unit) + } + + object millis extends TimeTypes { + protected val unit = TimeUnit.MILLIS // TIMESTAMP implicit val pfTimestampMillis: Primitive[Instant] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timestampType(true, unit))( - Instant.ofEpochMilli - )(_.toEpochMilli) + ParquetField.logicalType[Long](ts(true))(millisToInstant)(millisFromInstant) implicit val pfLocalDateTimeMillis: Primitive[LocalDateTime] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timestampType(false, unit))(ms => - LocalDateTime.ofInstant(Instant.ofEpochMilli(ms), ZoneOffset.UTC) - )( - _.toInstant(ZoneOffset.UTC).toEpochMilli - ) + ParquetField.logicalType[Long](ts(false))(millisToLocalDateTime)(millisFromLocalDateTime) // TIME implicit val pfOffsetTimeMillis: Primitive[OffsetTime] = - ParquetField.logicalType[Int](LogicalTypeAnnotation.timeType(true, unit))(ms => + ParquetField.logicalType[Int](time(true))(ms => LocalTime.ofNanoOfDay(ms * 1000000L).atOffset(ZoneOffset.UTC) )(t => (t.toLocalTime.toNanoOfDay / 1000000).toInt) implicit val pfLocalTimeMillis: Primitive[LocalTime] = - ParquetField.logicalType[Int](LogicalTypeAnnotation.timeType(false, unit))(ms => - LocalTime.ofNanoOfDay(ms * 1000000L) - )(t => (t.toNanoOfDay / 1000000).toInt) + ParquetField.logicalType[Int](time(false))(millisToLocalTime)(millisFromLocalTime) } - object micros { - private val unit = TimeUnit.MICROS + object micros extends TimeTypes { + override protected val unit = TimeUnit.MICROS // TIMESTAMP - implicit val pfTimestampMillis: Primitive[Instant] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timestampType(true, unit))(us => - Instant.ofEpochMilli(us / 1000) - )(_.toEpochMilli * 1000) - implicit val pfLocalDateTimeMillis: Primitive[LocalDateTime] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timestampType(false, unit))(us => - LocalDateTime.ofInstant(Instant.ofEpochMilli(us / 1000), ZoneOffset.UTC) - )( - _.toInstant(ZoneOffset.UTC).toEpochMilli * 1000 - ) + implicit val pfTimestampMicros: Primitive[Instant] = + ParquetField.logicalType[Long](ts(true))(microsToInstant)(microsFromInstant) + implicit val pfLocalDateTimeMicros: Primitive[LocalDateTime] = + ParquetField.logicalType[Long](ts(false))(microsToLocalDateTime)(microsFromLocalDateTime) // TIME implicit val pfOffsetTimeMicros: Primitive[OffsetTime] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timeType(true, unit))(us => + ParquetField.logicalType[Long](time(true))(us => LocalTime.ofNanoOfDay(us * 1000).atOffset(ZoneOffset.UTC) )(_.toLocalTime.toNanoOfDay / 1000) implicit val pfLocalTimeMicros: Primitive[LocalTime] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timeType(false, unit))(us => - LocalTime.ofNanoOfDay(us * 1000) - )(_.toNanoOfDay / 1000) + ParquetField.logicalType[Long](time(false))(microsToLocalTime)(microsFromLocalTime) } - object nanos { - private val unit = TimeUnit.NANOS + object nanos extends TimeTypes { + override protected val unit = TimeUnit.NANOS // TIMESTAMP implicit val pfTimestampMillis: Primitive[Instant] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timestampType(true, unit))(ns => - Instant.ofEpochMilli(ns / 1000000) - )(_.toEpochMilli * 1000000) + ParquetField.logicalType[Long](ts(true))(nanosToInstant)(nanosFromInstant) implicit val pfLocalDateTimeMillis: Primitive[LocalDateTime] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timestampType(false, unit))(ns => - LocalDateTime.ofInstant(Instant.ofEpochMilli(ns / 1000000), ZoneOffset.UTC) - )( - _.toInstant(ZoneOffset.UTC).toEpochMilli * 1000000 - ) + ParquetField.logicalType[Long](ts(false))(nanosToLocalDateTime)(nanosFromLocalDateTime) // TIME implicit val pfOffsetTimeNanos: Primitive[OffsetTime] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timeType(true, unit))(ns => + ParquetField.logicalType[Long](time(true))(ns => LocalTime.ofNanoOfDay(ns).atOffset(ZoneOffset.UTC) )(_.toLocalTime.toNanoOfDay) implicit val pfLocalTimeNanos: Primitive[LocalTime] = - ParquetField.logicalType[Long](LogicalTypeAnnotation.timeType(false, unit))( - LocalTime.ofNanoOfDay - )(_.toNanoOfDay) + ParquetField.logicalType[Long](time(false))(nanosToLocalTime)(nanosFromLocalTime) } } diff --git a/scalacheck/src/test/scala/magnolify/scalacheck/TestArbitrary.scala b/scalacheck/src/test/scala/magnolify/scalacheck/TestArbitrary.scala index c0d9de75e..e3da13639 100644 --- a/scalacheck/src/test/scala/magnolify/scalacheck/TestArbitrary.scala +++ b/scalacheck/src/test/scala/magnolify/scalacheck/TestArbitrary.scala @@ -79,6 +79,10 @@ object TestArbitrary { joda.LocalDateTime.parse(ldt.toString) } } + implicit val arbJodaDuration: Arbitrary[joda.Duration] = + Arbitrary(Gen.posNum[Long].map(joda.Duration.millis)) + implicit val arbJodaInstant: Arbitrary[joda.Instant] = + Arbitrary(Gen.posNum[Long].map(l => new joda.Instant(l))) // enum implicit lazy val arbJavaEnum: Arbitrary[JavaEnums.Color] = diff --git a/shared/src/main/scala/magnolify/shared/Time.scala b/shared/src/main/scala/magnolify/shared/Time.scala new file mode 100644 index 000000000..8eb6f177f --- /dev/null +++ b/shared/src/main/scala/magnolify/shared/Time.scala @@ -0,0 +1,134 @@ +package magnolify.shared + +import org.joda.time as joda +import java.time.{Duration, Instant, LocalDateTime, LocalTime, ZoneOffset} +import java.util.concurrent.TimeUnit + +object Time { +// @inline def microsToMillis(micros: Long): Long = TimeUnit.MICROSECONDS.toMillis(micros) +// @inline def millisToMicros(millis: Long): Long = TimeUnit.MILLISECONDS.toMicros(millis) + + // millis ///////////////////////////////////////////////////// + @inline def millisToInstant(millisFromEpoch: Long): Instant = + Instant.ofEpochMilli(millisFromEpoch) + @inline def millisFromInstant(instant: Instant): Long = instant.toEpochMilli + @inline def millisToJodaInstant(millisFromEpoch: Long): joda.Instant = + new joda.Instant(millisFromEpoch) + @inline def millisFromJodaInstant(instant: joda.Instant): Long = instant.getMillis + + @inline def millisToLocalTime(millisFromMidnight: Int): LocalTime = + LocalTime.ofNanoOfDay(TimeUnit.MILLISECONDS.toNanos(millisFromMidnight.toLong)) + @inline def millisFromLocalTime(lt: LocalTime): Int = + TimeUnit.NANOSECONDS.toMillis(lt.toNanoOfDay).toInt + @inline def millisToJodaLocalTime(millisFromMidnight: Int): joda.LocalTime = + joda.LocalTime.fromMillisOfDay(millisFromMidnight.toLong) + @inline def millisFromJodaLocalTime(lt: joda.LocalTime): Int = lt.millisOfDay().get() + + @inline def millisToJodaDateTime(millisFromEpoch: Long): joda.DateTime = + new joda.DateTime(millisFromEpoch, joda.DateTimeZone.UTC) + @inline def millisFromJodaDateTime(dt: joda.DateTime): Long = dt.getMillis + + @inline def millisToLocalDateTime(millisFromEpoch: Long): LocalDateTime = + LocalDateTime.ofInstant(millisToInstant(millisFromEpoch), ZoneOffset.UTC) + @inline def millisFromLocalDateTime(ldt: LocalDateTime): Long = + millisFromInstant(ldt.toInstant(ZoneOffset.UTC)) + @inline def millisToJodaLocalDateTime(millisFromEpoch: Long): joda.LocalDateTime = + new joda.LocalDateTime(millisFromEpoch, joda.DateTimeZone.UTC) + @inline def millisFromJodaLocalDateTime(ldt: joda.LocalDateTime): Long = + ldt.toDateTime(joda.DateTimeZone.UTC).getMillis + + @inline def millisToDuration(millis: Long): Duration = Duration.ofMillis(millis) + @inline def millisFromDuration(d: Duration): Long = + TimeUnit.SECONDS.toMillis(d.getSeconds) + TimeUnit.NANOSECONDS.toMillis(d.getNano) + @inline def millisToJodaDuration(millis: Long): joda.Duration = joda.Duration.millis(millis) + @inline def millisFromJodaDuration(d: joda.Duration): Long = d.getMillis + + // micros ///////////////////////////////////////////////////// + @inline def microsToInstant(microsFromEpoch: Long): Instant = { + val epochSeconds = TimeUnit.MICROSECONDS.toSeconds(microsFromEpoch) + val nanoAdjustment = TimeUnit.MICROSECONDS.toNanos(microsFromEpoch % 1_000_000L) + Instant.ofEpochSecond(epochSeconds, nanoAdjustment) + } + @inline def microsFromInstant(instant: Instant): Long = { + val seconds = instant.getEpochSecond + val nanos = instant.getNano + if (seconds < 0 && nanos > 0) { + val micros = Math.multiplyExact(seconds + 1, 1000000L) + val adjustment = (nanos / 1000L) - 1000000 + Math.addExact(micros, adjustment) + } else { + val micros = Math.multiplyExact(seconds, 1000000L) + Math.addExact(micros, nanos / 1000L) + } + } + @inline def microsToJodaInstant(microsFromEpoch: Long): joda.Instant = + new joda.Instant(TimeUnit.MICROSECONDS.toMillis(microsFromEpoch), joda.DateTimeZone.UTC) + @inline def microsFromJodaInstant(instant: joda.Instant): Long = + TimeUnit.MILLISECONDS.toMicros(instant.getMillis) + + @inline def microsToJodaDateTime(microsFromEpoch: Long): joda.DateTime = + new joda.DateTime(TimeUnit.MICROSECONDS.toMillis(microsFromEpoch), joda.DateTimeZone.UTC) + @inline def microsFromJodaDateTime(dt: joda.DateTime): Long = + TimeUnit.MILLISECONDS.toMicros(dt.getMillis) + + @inline def microsToLocalTime(microsFromMidnight: Long): LocalTime = + LocalTime.ofNanoOfDay(TimeUnit.MICROSECONDS.toNanos(microsFromMidnight)) + @inline def microsFromLocalTime(lt: LocalTime): Long = + TimeUnit.NANOSECONDS.toMicros(lt.toNanoOfDay) + @inline def microsToJodaLocalTime(microsFromMidnight: Long): joda.LocalTime = + joda.LocalTime.fromMillisOfDay(TimeUnit.MICROSECONDS.toMillis(microsFromMidnight)) + @inline def microsFromJodaLocalTime(lt: joda.LocalTime): Long = + TimeUnit.MILLISECONDS.toMicros(lt.millisOfDay().get()) + + @inline def microsToLocalDateTime(microsFromEpoch: Long): LocalDateTime = + LocalDateTime.ofInstant(microsToInstant(microsFromEpoch), ZoneOffset.UTC) + @inline def microsFromLocalDateTime(ldt: LocalDateTime): Long = + microsFromInstant(ldt.toInstant(ZoneOffset.UTC)) + + @inline def microsToDuration(micros: Long): Duration = + Duration.ofMillis(TimeUnit.MICROSECONDS.toMillis(micros)) + @inline def microsFromDuration(d: Duration): Long = + TimeUnit.SECONDS.toMicros(d.getSeconds) + TimeUnit.NANOSECONDS.toMicros(d.getNano) + @inline def microsToJodaDuration(micros: Long): joda.Duration = + joda.Duration.millis(TimeUnit.MICROSECONDS.toMillis(micros)) + @inline def microsFromJodaDuration(d: joda.Duration): Long = + TimeUnit.MILLISECONDS.toMicros(d.getMillis) + + // nanos ///////////////////////////////////////////////////// + // Long does not technically have enough range for Instant + @inline def nanosToInstant(epochNanos: Long): Instant = + Instant.ofEpochSecond(TimeUnit.NANOSECONDS.toSeconds(epochNanos), epochNanos % 1_000_000_000L) + @inline def nanosFromInstant(instant: Instant): Long = + TimeUnit.MILLISECONDS.toNanos(instant.toEpochMilli) + instant.getNano + @inline def nanosToJodaInstant(nanosFromEpoch: Long): joda.Instant = + new joda.Instant(TimeUnit.NANOSECONDS.toMillis(nanosFromEpoch), joda.DateTimeZone.UTC) + @inline def nanosFromJodaInstant(instant: joda.Instant): Long = + TimeUnit.MILLISECONDS.toNanos(instant.getMillis) + + @inline def nanosToJodaDateTime(nanosFromEpoch: Long): joda.DateTime = + new joda.DateTime(TimeUnit.NANOSECONDS.toMillis(nanosFromEpoch), joda.DateTimeZone.UTC) + @inline def nanosFromJodaDateTime(dt: joda.DateTime): Long = + TimeUnit.MILLISECONDS.toNanos(dt.getMillis) + + @inline def nanosToLocalTime(nanosFromMidnight: Long): LocalTime = + LocalTime.ofNanoOfDay(nanosFromMidnight) + @inline def nanosFromLocalTime(lt: LocalTime): Long = lt.toNanoOfDay + @inline def nanosToJodaLocalTime(nanosFromMidnight: Long): joda.LocalTime = + joda.LocalTime.fromMillisOfDay(TimeUnit.NANOSECONDS.toMillis(nanosFromMidnight)) + @inline def nanosFromJodaLocalTime(lt: joda.LocalTime): Long = + TimeUnit.MILLISECONDS.toNanos(lt.millisOfDay().get()) + + @inline def nanosToLocalDateTime(nanosFromEpoch: Long): LocalDateTime = + LocalDateTime.ofInstant(nanosToInstant(nanosFromEpoch), ZoneOffset.UTC) + @inline def nanosFromLocalDateTime(ldt: LocalDateTime): Long = + nanosFromInstant(ldt.toInstant(ZoneOffset.UTC)) + + @inline def nanosToDuration(nanos: Long): Duration = + Duration.ofNanos(nanos) + @inline def nanosFromDuration(d: Duration): Long = + TimeUnit.SECONDS.toNanos(d.getSeconds) + d.getNano + @inline def nanosToJodaDuration(nanos: Long): joda.Duration = + joda.Duration.millis(TimeUnit.NANOSECONDS.toMillis(nanos)) + @inline def nanosFromJodaDuration(d: joda.Duration): Long = + TimeUnit.MILLISECONDS.toNanos(d.getMillis) +} From 803e64ee1f906d575e16a8f744073c9b44482be9 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Wed, 21 Aug 2024 23:39:41 -0400 Subject: [PATCH 02/31] wip --- .../scala/magnolify/beam/BeamSchemaType.scala | 100 +++--------------- .../magnolify/beam/BeamSchemaTypeSuite.scala | 81 ++++++++------ 2 files changed, 64 insertions(+), 117 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index 1c9c4033a..205380b00 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -19,7 +19,7 @@ package magnolify.beam import magnolia1.* import magnolify.shared.* import org.apache.beam.sdk.schemas.Schema -import org.apache.beam.sdk.schemas.Schema.{FieldType, LogicalType} +import org.apache.beam.sdk.schemas.Schema.FieldType import org.apache.beam.sdk.values.Row import org.joda.time as joda import com.google.protobuf.ByteString @@ -27,7 +27,6 @@ import magnolify.shims.FactoryCompat import org.apache.beam.sdk.schemas.logicaltypes as logicaltypes import java.nio.ByteBuffer -import java.time.LocalDate import java.{time as jt, util as ju} import scala.annotation.implicitNotFound import scala.collection.concurrent @@ -87,25 +86,9 @@ object BeamSchemaField { override def to(v: T)(cm: CaseMapper): ToT = toFn(v) } -// private[magnolify] def aux2[T, Repr](fieldTypeFn: CaseMapper => FieldType)(fromFn: Repr => T)( -// toFn: T => Repr -// ): BeamSchemaField[T] = -// aux[T, Repr, Repr](fieldTypeFn)(fromFn)(toFn) - private[magnolify] def id[T](ft: CaseMapper => FieldType): BeamSchemaField[T] = aux[T, T, T](ft)(identity)(identity) - private[magnolify] def logicalId[T](ft: CaseMapper => FieldType): BeamSchemaField[T] = id(ft) - -// private[magnolify] def logicalId[T](ft: CaseMapper => FieldType): BeamSchemaLogicalField[T] = -// new BeamSchemaLogicalField[T] { -// type FromT = T -// type ToT = T -// override def fieldType(cm: CaseMapper): FieldType = ft(cm) -// override def from(v: FromT)(cm: CaseMapper): T = v -// override def to(v: T)(cm: CaseMapper): ToT = v -// } - def from[T]: FromWord[T] = new FromWord[T] class FromWord[T] { @@ -117,24 +100,6 @@ object BeamSchemaField { } } -// def logicalType[T]: LogicalWord[T] = new LogicalWord[T] -// -// class LogicalWord[T] { -// def apply[From, To](lt: LogicalType[From, ?], nullable: Boolean = false): BeamSchemaLogicalField[T] = { -// new BeamSchemaLogicalField[T] { -// type FromT = From -// type ToT = To -// override def logicalType: LogicalType[From, To] = lt -// override def fieldType(cm: CaseMapper): FieldType = -// FieldType.logicalType(logicalType).withNullable(nullable) -// override def from(v: From)(cm: CaseMapper): T = -// throw new UnsupportedOperationException("Do not call from() on logical types") -// override def to(v: T)(cm: CaseMapper): To = -// throw new UnsupportedOperationException("Do not call to() on logical types") -// } -// } -// } - sealed trait Record[T] extends Aux[T, Row, Row] { @transient private lazy val schemaCache: concurrent.Map[ju.UUID, Schema] = concurrent.TrieMap.empty @@ -153,17 +118,15 @@ object BeamSchemaField { def join[T](caseClass: CaseClass[Typeclass, T]): BeamSchemaField[T] = { if (caseClass.isValueClass) { - // FIXME -// val p = caseClass.parameters.head -// val tc = p.typeclass -// new BeamSchemaField[T] { -// override type FromT = tc.FromT -// override type ToT = tc.ToT -// // override protected def buildSchema(cm: CaseMapper): Schema = tc.buildSchema(cm) -// override def from(v: FromT)(cm: CaseMapper): T = caseClass.construct(_ => tc.fromAny(v)(cm)) -// override def to(v: T)(cm: CaseMapper): ToT = tc.to(p.dereference(v))(cm) -// } - ??? + val p = caseClass.parameters.head + val tc = p.typeclass + new BeamSchemaField[T] { + override type FromT = tc.FromT + override type ToT = tc.ToT + override def fieldType(cm: CaseMapper): FieldType = tc.fieldType(cm) + override def from(v: FromT)(cm: CaseMapper): T = caseClass.construct(_ => tc.fromAny(v)(cm)) + override def to(v: T)(cm: CaseMapper): ToT = tc.to(p.dereference(v))(cm) + } } else { new Record[T] { override def fieldType(cm: CaseMapper): FieldType = FieldType.row(schema(cm)) @@ -171,7 +134,7 @@ object BeamSchemaField { override protected def buildSchema(cm: CaseMapper): Schema = caseClass.parameters .foldLeft(Schema.builder()) { case (s, p) => - s.addField(p.label, p.typeclass.fieldType(cm)) + s.addField(cm.map(p.label), p.typeclass.fieldType(cm)) } .build() @@ -229,20 +192,10 @@ object BeamSchemaField { )(_.bigDecimal) implicit val bsfUUID: BeamSchemaField[ju.UUID] = - logicalId[ju.UUID](_ => FieldType.logicalType(new logicaltypes.UuidLogicalType)) -// new BeamSchemaLogicalField[ju.UUID] { -// type FromT = ju.UUID -// type ToT = ju.UUID -// val logicalType = new logicaltypes.UuidLogicalType -// override def fieldType(cm: CaseMapper): FieldType = FieldType.logicalType(logicalType) -// override def from(v: ju.UUID)(cm: CaseMapper): ju.UUID = v -// override def to(v: ju.UUID)(cm: CaseMapper): ju.UUID = v -// } + id[ju.UUID](_ => FieldType.logicalType(new logicaltypes.UuidLogicalType)) -// implicit val bsfLocalDate: BeamSchemaField[jt.LocalDate] = -// from[Long](LocalDate.ofEpochDay)(_.toEpochDay) implicit val bsfLocalDate: BeamSchemaField[jt.LocalDate] = - logicalId[jt.LocalDate](_ => FieldType.logicalType(new logicaltypes.Date)) + id[jt.LocalDate](_ => FieldType.logicalType(new logicaltypes.Date)) private lazy val EpochJodaDate = new joda.LocalDate(1970, 1, 1) implicit val bsfJodaLocalDate: BeamSchemaField[joda.LocalDate] = from[Int](daysFromEpoch => EpochJodaDate.plusDays(daysFromEpoch))(d => @@ -292,18 +245,6 @@ object BeamSchemaField { ti: C[T] => Iterable[T], fc: FactoryCompat[T, C[T]] ): BeamSchemaField[C[T]] = { -// f match { -// case l: BeamSchemaLogicalField[T] => -// new BeamSchemaLogicalField[C[T]] { -// type FromT = ju.List[l.FromT] -// type ToT = ju.List[l.ToT] -// override def fieldType(cm: CaseMapper): FieldType = FieldType.iterable(l.fieldType(cm)) -// override def to(v: C[T])(cm: CaseMapper): ToT = -// v.iterator.map(l.to(_)(cm)).toList.asJava -// override def from(v: FromT)(cm: CaseMapper): C[T] = -// fc.fromSpecific(v.asScala.iterator.map(p => l.from(p)(cm))) -// } -// case _: BeamSchemaField[_] => new Aux[C[T], ju.List[f.FromT], ju.List[f.ToT]] { override def from(v: ju.List[f.FromT])(cm: CaseMapper): C[T] = fc.fromSpecific(v.asScala.iterator.map(p => f.from(p)(cm))) @@ -311,23 +252,9 @@ object BeamSchemaField { v.iterator.map(f.to(_)(cm)).toList.asJava override def fieldType(cm: CaseMapper): FieldType = FieldType.iterable(f.fieldType(cm)) } -// } } implicit def bsfOption[T](implicit f: BeamSchemaField[T]): BeamSchemaField[Option[T]] = { -// f match { -// case l: BeamSchemaLogicalField[T] => -// new BeamSchemaLogicalField[Option[T]] { -// type FromT = l.FromT -// type ToT = l.ToT -// override def fieldType(cm: CaseMapper): FieldType = l.fieldType(cm).withNullable(true) -// override def to(v: Option[T])(cm: CaseMapper): ToT = v match { -// case None => null.asInstanceOf[ToT] -// case Some(value) => l.to(value)(cm) -// } -// override def from(v: FromT)(cm: CaseMapper): Option[T] = Option(v).map(l.from(_)(cm)) -// } -// case _: BeamSchemaField[T] => new Aux[Option[T], f.FromT, f.ToT] { override def from(v: f.FromT)(cm: CaseMapper): Option[T] = if (v == null) None else Some(f.from(v)(cm)) @@ -337,6 +264,5 @@ object BeamSchemaField { } override def fieldType(cm: CaseMapper): FieldType = f.fieldType(cm).withNullable(true) } -// } } } diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala index c15180c04..3b881be13 100644 --- a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala @@ -21,14 +21,17 @@ import magnolify.cats.auto.* import magnolify.cats.TestEq.* import magnolify.scalacheck.auto.* import magnolify.scalacheck.TestArbitrary.* +import magnolify.shared.CaseMapper import magnolify.test.MagnolifySuite import magnolify.test.Simple.* +import org.apache.beam.sdk.schemas.Schema import org.joda.time as joda import org.scalacheck.{Arbitrary, Gen, Prop} import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} import java.util.UUID import scala.reflect.ClassTag +import scala.jdk.CollectionConverters.* class BeamSchemaTypeSuite extends MagnolifySuite { private def test[T: Arbitrary: ClassTag](implicit @@ -60,6 +63,8 @@ class BeamSchemaTypeSuite extends MagnolifySuite { test[Maps] test[Logical] + // FIXME value classes + { import magnolify.beam.unsafe._ import magnolify.shared.TestEnumType._ @@ -71,37 +76,54 @@ class BeamSchemaTypeSuite extends MagnolifySuite { Arbitrary(Gen.chooseNum(0, Int.MaxValue).map(BigDecimal(_))) test[Decimal] + test("Millis") { + import magnolify.beam.logical.millis.* + test[JavaTime] + test[JodaTime] + } + + test("Micros") { + import magnolify.beam.logical.micros.* + test[JavaTime] + test[JodaTime] + } + + test("Nanos") { + import magnolify.beam.logical.nanos.* + test[JavaTime] + test[JodaTime] + } + { - import magnolify.beam.logical.millis._ - test[Time] - test[Joda] + implicit val bst: BeamSchemaType[LowerCamel] = + BeamSchemaType[LowerCamel](CaseMapper(_.toUpperCase)) + test[LowerCamel] + + test("LowerCamel mapping") { + val schema = bst.schema + + val fields = LowerCamel.fields.map(_.toUpperCase) + assertEquals(schema.getFields.asScala.map(_.getName()).toSeq, fields) + assertEquals( + schema.getField("INNERFIELD").getType.getRowSchema.getFields.asScala.map(_.getName()).toSeq, + Seq("INNERFIRST") + ) + } } -// { -// // FIXME need special Eq instances that are lossy -// import magnolify.beam.logical.micros._ -// test[Time] -// test[Joda] -// } -// -// { -//// FIXME need special Eq instances that are lossy -// import magnolify.beam.logical.nanos._ -// test[Time] -// test[Joda] -// } - -// { -// implicit val bst: BeamSchemaType[LowerCamel] = -// BeamSchemaType[LowerCamel](CaseMapper(_.toUpperCase)) -// test[LowerCamel] -// -// test("LowerCamel mapping") { -// val schema = bst.schema -// // FIXME -// } -// } + test("ValueClass") { + // value classes should act only as fields + intercept[IllegalArgumentException] { + BeamSchemaType[ValueClass] + } + implicit val bst: BeamSchemaType[HasValueClass] = BeamSchemaType[HasValueClass] + test[HasValueClass] + + assert(bst.schema.getField("vc").getType == Schema.FieldType.STRING) + val record = bst(HasValueClass(ValueClass("String"))) + assert(record.getValue("vc").equals("String")) + } } case class Decimal(bd: BigDecimal, bdo: Option[BigDecimal]) @@ -111,14 +133,13 @@ case class Logical( ul: List[UUID], ulo: List[Option[UUID]] ) - -case class Time( +case class JavaTime( i: Instant, d: LocalDate, dt: LocalDateTime, t: LocalTime ) -case class Joda( +case class JodaTime( i: joda.Instant, dt: joda.DateTime, lt: joda.LocalTime, From a3d90bfd5ceb47979b0e11c1536a5b5e68631c30 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 08:34:38 -0400 Subject: [PATCH 03/31] wip --- .../main/scala/magnolify/avro/AvroType.scala | 2 +- .../magnolify/avro/logical/package.scala | 1 - .../scala/magnolify/beam/BeamSchemaType.scala | 22 +++++++++++-- .../magnolify/beam/BeamSchemaTypeSuite.scala | 33 ++++++++++++------- .../magnolify/parquet/logical/TimeTypes.scala | 28 ++++++++++++++++ .../magnolify/parquet/logical/package.scala | 9 ----- .../magnolify/parquet/ParquetTypeSuite.scala | 6 ++-- .../main/scala/magnolify/shared/Time.scala | 16 ++++----- 8 files changed, 81 insertions(+), 36 deletions(-) create mode 100644 parquet/src/main/scala/magnolify/parquet/logical/TimeTypes.scala diff --git a/avro/src/main/scala/magnolify/avro/AvroType.scala b/avro/src/main/scala/magnolify/avro/AvroType.scala index d4f853cf7..7ede81ffe 100644 --- a/avro/src/main/scala/magnolify/avro/AvroType.scala +++ b/avro/src/main/scala/magnolify/avro/AvroType.scala @@ -221,7 +221,7 @@ object AvroField { override def from(v: String)(cm: CaseMapper): String = v override def to(v: String)(cm: CaseMapper): String = v } -f + implicit def afEnum[T](implicit et: EnumType[T], lp: shapeless.LowPriority): AvroField[T] = // Avro 1.9+ added a type parameter for `GenericEnumSymbol`, breaking 1.8 compatibility // Some reader, i.e. `AvroParquetReader` reads enums as `Utf8` diff --git a/avro/src/main/scala/magnolify/avro/logical/package.scala b/avro/src/main/scala/magnolify/avro/logical/package.scala index 35620cc56..bd0d070c1 100644 --- a/avro/src/main/scala/magnolify/avro/logical/package.scala +++ b/avro/src/main/scala/magnolify/avro/logical/package.scala @@ -22,7 +22,6 @@ import org.joda.{time => joda} import java.time._ import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder} -import java.util.concurrent.TimeUnit package object logical { import magnolify.shared.Time._ diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index 205380b00..67b489a4f 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -24,7 +24,7 @@ import org.apache.beam.sdk.values.Row import org.joda.time as joda import com.google.protobuf.ByteString import magnolify.shims.FactoryCompat -import org.apache.beam.sdk.schemas.logicaltypes as logicaltypes +import org.apache.beam.sdk.schemas.logicaltypes import java.nio.ByteBuffer import java.{time as jt, util as ju} @@ -112,9 +112,26 @@ object BeamSchemaField { type Typeclass[T] = BeamSchemaField[T] implicit def gen[T]: BeamSchemaField[T] = macro Magnolia.gen[T] - @implicitNotFound("Cannot derive BeamSchemaField for sealed trait") // TODO does this make sense? + // TODO beam schemas support OneOf + @implicitNotFound("Cannot derive BeamSchemaField for sealed trait") private sealed trait Dispatchable[T] def split[T: Dispatchable](sealedTrait: SealedTrait[Typeclass, T]): BeamSchemaField[T] = ??? + // new BeamSchemaField[T] { + // override type FromT = ??? + // override type ToT = ??? + // override def fieldType(cm: CaseMapper): FieldType = { + // FieldType.logicalType( + // logicaltypes.OneOfType.create( + // sealedTrait.subtypes.map { sub => + // Field.of(s"${sub.typeName.owner}.${sub.typeName.short}", sub.typeclass.fieldType(cm)) + // } + // .asJava + // ) + // ) + // } + // override def from(v: this.type)(cm: CaseMapper): T = ??? + // override def to(v: T)(cm: CaseMapper): this.type = ??? + // } def join[T](caseClass: CaseClass[Typeclass, T]): BeamSchemaField[T] = { if (caseClass.isValueClass) { @@ -233,7 +250,6 @@ object BeamSchemaField { new Aux[Map[K, V], ju.Map[bsfK.FromT, bsfV.FromT], ju.Map[bsfK.ToT, bsfV.ToT]] { override def fieldType(cm: CaseMapper): FieldType = FieldType.map(bsfK.fieldType(cm), bsfV.fieldType(cm)) - // TODO need to convert/not convert logical types override def from(v: ju.Map[bsfK.FromT, bsfV.FromT])(cm: CaseMapper): Map[K, V] = v.asScala.map { case (k, v) => bsfK.from(k)(cm) -> bsfV.from(v)(cm) }.toMap override def to(v: Map[K, V])(cm: CaseMapper): ju.Map[bsfK.ToT, bsfV.ToT] = diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala index 3b881be13..9a5cf127a 100644 --- a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala @@ -17,6 +17,7 @@ package magnolify.beam import cats.* +import com.google.protobuf.ByteString import magnolify.cats.auto.* import magnolify.cats.TestEq.* import magnolify.scalacheck.auto.* @@ -51,6 +52,12 @@ class BeamSchemaTypeSuite extends MagnolifySuite { } } + implicit val arbByteString: Arbitrary[ByteString] = + Arbitrary(Gen.alphaNumStr.map(ByteString.copyFromUtf8)) + implicit val arbBigDecimal: Arbitrary[BigDecimal] = + Arbitrary(Gen.chooseNum(0, Int.MaxValue).map(BigDecimal(_))) + implicit val eqByteString: Eq[ByteString] = Eq.instance(_ == _) + test[Integers] test[Floats] test[Required] @@ -60,10 +67,10 @@ class BeamSchemaTypeSuite extends MagnolifySuite { test[Collections] test[MoreCollections] + test[Bs] test[Maps] test[Logical] - - // FIXME value classes + test[Decimal] { import magnolify.beam.unsafe._ @@ -72,23 +79,19 @@ class BeamSchemaTypeSuite extends MagnolifySuite { test[UnsafeEnums] } - implicit val arbBigDecimal: Arbitrary[BigDecimal] = - Arbitrary(Gen.chooseNum(0, Int.MaxValue).map(BigDecimal(_))) - test[Decimal] - - test("Millis") { + property("Millis") { import magnolify.beam.logical.millis.* test[JavaTime] test[JodaTime] } - test("Micros") { + property("Micros") { import magnolify.beam.logical.micros.* test[JavaTime] test[JodaTime] } - test("Nanos") { + property("Nanos") { import magnolify.beam.logical.nanos.* test[JavaTime] test[JodaTime] @@ -122,10 +125,11 @@ class BeamSchemaTypeSuite extends MagnolifySuite { assert(bst.schema.getField("vc").getType == Schema.FieldType.STRING) val record = bst(HasValueClass(ValueClass("String"))) - assert(record.getValue("vc").equals("String")) + assert(record.getValue[String]("vc").equals("String")) } } +case class Bs(bs: ByteString) case class Decimal(bd: BigDecimal, bdo: Option[BigDecimal]) case class Logical( u: UUID, @@ -148,7 +152,14 @@ case class JodaTime( case class Maps( ms: Map[String, String], mi: Map[Int, Int], + ml: Map[Long, Long], + md: Map[Double, Double], + mf: Map[Float, Float], + mb: Map[Byte, Byte], + msh: Map[Short, Short], + mba: Map[Byte, Array[Byte]], + mbs: Map[ByteString, Array[Byte]], mso: Map[Option[String], Option[String]], - ml: Map[UUID, UUID], + mu: Map[UUID, UUID], mlo: Map[Option[UUID], Option[UUID]] ) diff --git a/parquet/src/main/scala/magnolify/parquet/logical/TimeTypes.scala b/parquet/src/main/scala/magnolify/parquet/logical/TimeTypes.scala new file mode 100644 index 000000000..1e9b76a5c --- /dev/null +++ b/parquet/src/main/scala/magnolify/parquet/logical/TimeTypes.scala @@ -0,0 +1,28 @@ +/* + * Copyright 2024 Spotify AB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package magnolify.parquet.logical + +import org.apache.parquet.schema.LogicalTypeAnnotation +import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit + +trait TimeTypes { + protected def unit: TimeUnit + protected def ts(adjusted: Boolean): LogicalTypeAnnotation = + LogicalTypeAnnotation.timestampType(adjusted, unit) + protected def time(adjusted: Boolean): LogicalTypeAnnotation = + LogicalTypeAnnotation.timeType(adjusted, unit) +} diff --git a/parquet/src/main/scala/magnolify/parquet/logical/package.scala b/parquet/src/main/scala/magnolify/parquet/logical/package.scala index 2883349e8..e0b64d84c 100644 --- a/parquet/src/main/scala/magnolify/parquet/logical/package.scala +++ b/parquet/src/main/scala/magnolify/parquet/logical/package.scala @@ -19,7 +19,6 @@ package magnolify.parquet import java.time._ import magnolify.parquet.ParquetField.Primitive -import org.apache.parquet.schema.LogicalTypeAnnotation import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit package object logical { @@ -27,14 +26,6 @@ package object logical { // TIME (millis i32, micros i64, nanos, i64), UTC true/false // TIMESTAMP (millis, micros, nanos), UTC true/false - private trait TimeTypes { - protected def unit: TimeUnit - protected def ts(adjusted: Boolean): LogicalTypeAnnotation = - LogicalTypeAnnotation.timestampType(adjusted, unit) - protected def time(adjusted: Boolean): LogicalTypeAnnotation = - LogicalTypeAnnotation.timeType(adjusted, unit) - } - object millis extends TimeTypes { protected val unit = TimeUnit.MILLIS diff --git a/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala b/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala index dbbb7bb8f..b7d6ef3ab 100644 --- a/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala +++ b/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala @@ -165,17 +165,17 @@ class ParquetTypeSuite extends MagnolifySuite { test[Logical] - { + property("Millis") { import magnolify.parquet.logical.millis._ test[Time] } - { + property("Micros") { import magnolify.parquet.logical.micros._ test[Time] } - { + property("Nanos") { import magnolify.parquet.logical.nanos._ test[Time] } diff --git a/shared/src/main/scala/magnolify/shared/Time.scala b/shared/src/main/scala/magnolify/shared/Time.scala index 8eb6f177f..5c61e960d 100644 --- a/shared/src/main/scala/magnolify/shared/Time.scala +++ b/shared/src/main/scala/magnolify/shared/Time.scala @@ -13,7 +13,7 @@ object Time { Instant.ofEpochMilli(millisFromEpoch) @inline def millisFromInstant(instant: Instant): Long = instant.toEpochMilli @inline def millisToJodaInstant(millisFromEpoch: Long): joda.Instant = - new joda.Instant(millisFromEpoch) + joda.Instant.ofEpochMilli(millisFromEpoch) @inline def millisFromJodaInstant(instant: joda.Instant): Long = instant.getMillis @inline def millisToLocalTime(millisFromMidnight: Int): LocalTime = @@ -39,7 +39,7 @@ object Time { @inline def millisToDuration(millis: Long): Duration = Duration.ofMillis(millis) @inline def millisFromDuration(d: Duration): Long = - TimeUnit.SECONDS.toMillis(d.getSeconds) + TimeUnit.NANOSECONDS.toMillis(d.getNano) + TimeUnit.SECONDS.toMillis(d.getSeconds) + TimeUnit.NANOSECONDS.toMillis(d.getNano.toLong) @inline def millisToJodaDuration(millis: Long): joda.Duration = joda.Duration.millis(millis) @inline def millisFromJodaDuration(d: joda.Duration): Long = d.getMillis @@ -62,7 +62,7 @@ object Time { } } @inline def microsToJodaInstant(microsFromEpoch: Long): joda.Instant = - new joda.Instant(TimeUnit.MICROSECONDS.toMillis(microsFromEpoch), joda.DateTimeZone.UTC) + joda.Instant.ofEpochMilli(TimeUnit.MICROSECONDS.toMillis(microsFromEpoch)) @inline def microsFromJodaInstant(instant: joda.Instant): Long = TimeUnit.MILLISECONDS.toMicros(instant.getMillis) @@ -78,7 +78,7 @@ object Time { @inline def microsToJodaLocalTime(microsFromMidnight: Long): joda.LocalTime = joda.LocalTime.fromMillisOfDay(TimeUnit.MICROSECONDS.toMillis(microsFromMidnight)) @inline def microsFromJodaLocalTime(lt: joda.LocalTime): Long = - TimeUnit.MILLISECONDS.toMicros(lt.millisOfDay().get()) + TimeUnit.MILLISECONDS.toMicros(lt.millisOfDay().get().toLong) @inline def microsToLocalDateTime(microsFromEpoch: Long): LocalDateTime = LocalDateTime.ofInstant(microsToInstant(microsFromEpoch), ZoneOffset.UTC) @@ -88,7 +88,7 @@ object Time { @inline def microsToDuration(micros: Long): Duration = Duration.ofMillis(TimeUnit.MICROSECONDS.toMillis(micros)) @inline def microsFromDuration(d: Duration): Long = - TimeUnit.SECONDS.toMicros(d.getSeconds) + TimeUnit.NANOSECONDS.toMicros(d.getNano) + TimeUnit.SECONDS.toMicros(d.getSeconds) + TimeUnit.NANOSECONDS.toMicros(d.getNano.toLong) @inline def microsToJodaDuration(micros: Long): joda.Duration = joda.Duration.millis(TimeUnit.MICROSECONDS.toMillis(micros)) @inline def microsFromJodaDuration(d: joda.Duration): Long = @@ -99,9 +99,9 @@ object Time { @inline def nanosToInstant(epochNanos: Long): Instant = Instant.ofEpochSecond(TimeUnit.NANOSECONDS.toSeconds(epochNanos), epochNanos % 1_000_000_000L) @inline def nanosFromInstant(instant: Instant): Long = - TimeUnit.MILLISECONDS.toNanos(instant.toEpochMilli) + instant.getNano + TimeUnit.SECONDS.toNanos(instant.getEpochSecond) + instant.getNano @inline def nanosToJodaInstant(nanosFromEpoch: Long): joda.Instant = - new joda.Instant(TimeUnit.NANOSECONDS.toMillis(nanosFromEpoch), joda.DateTimeZone.UTC) + joda.Instant.ofEpochMilli(TimeUnit.NANOSECONDS.toMillis(nanosFromEpoch)) @inline def nanosFromJodaInstant(instant: joda.Instant): Long = TimeUnit.MILLISECONDS.toNanos(instant.getMillis) @@ -116,7 +116,7 @@ object Time { @inline def nanosToJodaLocalTime(nanosFromMidnight: Long): joda.LocalTime = joda.LocalTime.fromMillisOfDay(TimeUnit.NANOSECONDS.toMillis(nanosFromMidnight)) @inline def nanosFromJodaLocalTime(lt: joda.LocalTime): Long = - TimeUnit.MILLISECONDS.toNanos(lt.millisOfDay().get()) + TimeUnit.MILLISECONDS.toNanos(lt.millisOfDay().get().toLong) @inline def nanosToLocalDateTime(nanosFromEpoch: Long): LocalDateTime = LocalDateTime.ofInstant(nanosToInstant(nanosFromEpoch), ZoneOffset.UTC) From 30beea4cd054a8b1299677937e8422f8d32e5fb3 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 09:08:20 -0400 Subject: [PATCH 04/31] wip --- .../scala/magnolify/beam/unsafe/package.scala | 18 +++++++++++++++++- .../main/scala/magnolify/shared/Time.scala | 19 ++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/unsafe/package.scala b/beam/src/main/scala/magnolify/beam/unsafe/package.scala index 9379200cf..88ba49f39 100644 --- a/beam/src/main/scala/magnolify/beam/unsafe/package.scala +++ b/beam/src/main/scala/magnolify/beam/unsafe/package.scala @@ -1,6 +1,22 @@ +/* + * Copyright 2024 Spotify AB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package magnolify.beam -import magnolify.shared._ +import magnolify.shared.* package object unsafe { implicit def afUnsafeEnum[T: EnumType]: BeamSchemaField[UnsafeEnum[T]] = diff --git a/shared/src/main/scala/magnolify/shared/Time.scala b/shared/src/main/scala/magnolify/shared/Time.scala index 5c61e960d..83ce8b8f9 100644 --- a/shared/src/main/scala/magnolify/shared/Time.scala +++ b/shared/src/main/scala/magnolify/shared/Time.scala @@ -1,3 +1,19 @@ +/* + * Copyright 2024 Spotify AB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package magnolify.shared import org.joda.time as joda @@ -5,9 +21,6 @@ import java.time.{Duration, Instant, LocalDateTime, LocalTime, ZoneOffset} import java.util.concurrent.TimeUnit object Time { -// @inline def microsToMillis(micros: Long): Long = TimeUnit.MICROSECONDS.toMillis(micros) -// @inline def millisToMicros(millis: Long): Long = TimeUnit.MILLISECONDS.toMicros(millis) - // millis ///////////////////////////////////////////////////// @inline def millisToInstant(millisFromEpoch: Long): Instant = Instant.ofEpochMilli(millisFromEpoch) From dc9c175e2db2eb542a0fc7717f27bd1ee52d4274 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 09:32:25 -0400 Subject: [PATCH 05/31] wip --- docs/beam.md | 3 +++ docs/mapping.md | 71 ++++++++++++++++++++++++++----------------------- 2 files changed, 40 insertions(+), 34 deletions(-) create mode 100644 docs/beam.md diff --git a/docs/beam.md b/docs/beam.md new file mode 100644 index 000000000..0009c2f34 --- /dev/null +++ b/docs/beam.md @@ -0,0 +1,3 @@ +# Beam + +https://beam.apache.org/documentation/programming-guide/#schema-definition \ No newline at end of file diff --git a/docs/mapping.md b/docs/mapping.md index bafde4b65..2b3b9222b 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -1,38 +1,38 @@ # Type Mapping -| Scala | Avro | BigQuery | Bigtable7 | Datastore | Parquet | Protobuf | TensorFlow | -|-----------------------------------|------------------------------|------------------------|---------------------------------|-----------------------|-----------------------------------|-------------------------|---------------------| -| `Unit` | `null` | x | x | `Null` | x | x | x | -| `Boolean` | `boolean` | `BOOL` | `Byte` | `Boolean` | `BOOLEAN` | `Boolean` | `INT64`3 | -| `Char` | `int`3 | `INT64`3 | `Char` | `Integer`3 | `INT32`3 | `Int`3 | `INT64`3 | -| `Byte` | `int`3 | `INT64`3 | `Byte` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | -| `Short` | `int`3 | `INT64`3 | `Short` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | -| `Int` | `int` | `INT64`3 | `Int` | `Integer`3 | `INT32`9 | `Int` | `INT64`3 | -| `Long` | `long` | `INT64` | `Long` | `Integer` | `INT64`9 | `Long` | `INT64` | -| `Float` | `float` | `FLOAT64`3 | `Float` | `Double`3 | `FLOAT` | `Float` | `FLOAT` | -| `Double` | `double` | `FLOAT64` | `Double` | `Double` | `DOUBLE` | `Double` | `FLOAT`3 | -| `CharSequence` | `string` | x | x | x | x | x | x | -| `String` | `string` | `STRING` | `String` | `String` | `BINARY` | `String` | `BYTES`3 | -| `Array[Byte]` | `bytes` | `BYTES` | `ByteString` | `Blob` | `BINARY` | `ByteString` | `BYTES` | -| `ByteString` | x | x | `ByteString` | `Blob` | x | `ByteString` | `BYTES` | -| `ByteBuffer` | `bytes` | x | x | | x | x | x | -| Enum1 | `enum` | `STRING`3 | `String` | `String`3 | `BINARY`/`ENUM`9 | Enum | `BYTES`3 | -| `BigInt` | x | x | `BigInt` | x | x | x | x | -| `BigDecimal` | `bytes`4 | `NUMERIC`6 | `Int` scale + unscaled `BigInt` | x | `LOGICAL[DECIMAL]`9,14 | x | x | -| `Option[T]` | `union[null, T]`5 | `NULLABLE` | Empty as `None` | Absent as `None` | `OPTIONAL` | `optional`10 | Size <= 1 | -| `Iterable[T]`2 | `array[T]` | `REPEATED` | x | `Array` | `REPEATED`13 | `repeated` | Size >= 0 | -| Nested | `record` | `STRUCT` | Flat8 | `Entity` | Group | `Message` | Flat8 | -| `Map[K, V]` | `map[V]`15 | x | x | x | x | `map` | x | -| `java.time.Instant` | `long`11 | `TIMESTAMP` | x | `Timestamp` | `LOGICAL[TIMESTAMP]`9 | x | x | -| `java.time.LocalDateTime` | `long`11 | `DATETIME` | x | x | `LOGICAL[TIMESTAMP]`9 | x | x | -| `java.time.OffsetTime` | x | x | x | x | `LOGICAL[TIME]`9 | x | x | -| `java.time.LocalTime` | `long`11 | `TIME` | x | x | `LOGICAL[TIME]`9 | x | x | -| `java.time.LocalDate` | `int`11 | `DATE` | x | x | `LOGICAL[DATE]`9 | x | x | -| `org.joda.time.LocalDate` | `int`11 | x | x | x | x | x | x | -| `org.joda.time.DateTime` | `int`11 | x | x | x | x | x | x | -| `org.joda.time.LocalTime` | `int`11 | x | x | x | x | x | x | -| `java.util.UUID` | `string`4 | x | ByteString (16 bytes) | x | `FIXED[16]` | x | x | -| `(Long, Long, Long)`12 | `fixed[12]` | x | x | x | x | x | x | +| Scala | Avro | Beam | BigQuery | Bigtable7 | Datastore | Parquet | Protobuf | TensorFlow | +|-----------------------------------|------------------------------|----------------------------------|------------------------|---------------------------------|-----------------------|-----------------------------------|-------------------------|---------------------| +| `Unit` | `null` | x | x | x | `Null` | x | x | x | +| `Boolean` | `boolean` | `BOOLEAN` | `BOOL` | `Byte` | `Boolean` | `BOOLEAN` | `Boolean` | `INT64`3 | +| `Char` | `int`3 | `BYTE` | `INT64`3 | `Char` | `Integer`3 | `INT32`3 | `Int`3 | `INT64`3 | +| `Byte` | `int`3 | `BYTE` | `INT64`3 | `Byte` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | +| `Short` | `int`3 | `INT16` | `INT64`3 | `Short` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | +| `Int` | `int` | `INT32` | `INT64`3 | `Int` | `Integer`3 | `INT32`9 | `Int` | `INT64`3 | +| `Long` | `long` | `INT64` | `INT64` | `Long` | `Integer` | `INT64`9 | `Long` | `INT64` | +| `Float` | `float` | `FLOAT` | `FLOAT64`3 | `Float` | `Double`3 | `FLOAT` | `Float` | `FLOAT` | +| `Double` | `double` | `DOUBLE` | `FLOAT64` | `Double` | `Double` | `DOUBLE` | `Double` | `FLOAT`3 | +| `CharSequence` | `string` | `STRING` | x | x | x | x | x | x | +| `String` | `string` | `STRING` | `STRING` | `String` | `String` | `BINARY` | `String` | `BYTES`3 | +| `Array[Byte]` | `bytes` | `BYTES` | `BYTES` | `ByteString` | `Blob` | `BINARY` | `ByteString` | `BYTES` | +| `ByteString` | x | `BYTES` | x | `ByteString` | `Blob` | x | `ByteString` | `BYTES` | +| `ByteBuffer` | `bytes` | `BYTES` | x | x | | x | x | x | +| Enum1 | `enum` | `STRING`16 | `STRING`3 | `String` | `String`3 | `BINARY`/`ENUM`9 | Enum | `BYTES`3 | +| `BigInt` | x | x | x | `BigInt` | x | x | x | x | +| `BigDecimal` | `bytes`4 | `DECIMAL` | `NUMERIC`6 | `Int` scale + unscaled `BigInt` | x | `LOGICAL[DECIMAL]`9,14 | x | x | +| `Option[T]` | `union[null, T]`5 | Empty as `null` | `NULLABLE` | Empty as `None` | Absent as `None` | `OPTIONAL` | `optional`10 | Size <= 1 | +| `Iterable[T]`2 | `array[T]` | `ITERABLE` | `REPEATED` | x | `Array` | `REPEATED`13 | `repeated` | Size >= 0 | +| Nested | `record` | `ROW` | `STRUCT` | Flat8 | `Entity` | Group | `Message` | Flat8 | +| `Map[K, V]` | `map[V]`15 | `MAP` | x | x | x | x | `map` | x | +| `java.time.Instant` | `long`11 | `INT64` | `TIMESTAMP` | x | `Timestamp` | `LOGICAL[TIMESTAMP]`9 | x | x | +| `java.time.LocalDateTime` | `long`11 | `INT64` | `DATETIME` | x | x | `LOGICAL[TIMESTAMP]`9 | x | x | +| `java.time.OffsetTime` | x | x | x | x | x | `LOGICAL[TIME]`9 | x | x | +| `java.time.LocalTime` | `long`11 | `INT32` | `TIME` | x | x | `LOGICAL[TIME]`9 | x | x | +| `java.time.LocalDate` | `int`11 | `INT64`17 | `DATE` | x | x | `LOGICAL[DATE]`9 | x | x | +| `org.joda.time.LocalDate` | `int`11 | `INT32` | x | x | x | x | x | x | +| `org.joda.time.DateTime` | `int`11 | `INT64` | x | x | x | x | x | x | +| `org.joda.time.LocalTime` | `int`11 | `INT32` | x | x | x | x | x | x | +| `java.util.UUID` | `string`4 | `ROW(INT64, INT64)`18 | x | ByteString (16 bytes) | x | `FIXED[16]` | x | x | +| `(Long, Long, Long)`12 | `fixed[12]` | x | x | x | x | x | x | x | 1. Those wrapped in`UnsafeEnum` are encoded as strings, see [enums.md](https://github.com/spotify/magnolify/blob/master/docs/enums.md) for more @@ -58,4 +58,7 @@ format: `required group $FIELDNAME (LIST) { repeated $FIELDTYPE array ($FIELDSCHEMA); }`. 14. Parquet's Decimal logical format supports multiple representations, and are not implicitly scoped by default. Import one of: `magnolify.parquet.ParquetField.{decimal32, decimal64, decimalFixed, decimalBinary}`. -15. Map key type in avro is fixed to string. Scala Map key type must be either `String` or `CharSequence`. \ No newline at end of file +15. Map key type in avro is fixed to string. Scala Map key type must be either `String` or `CharSequence`. +16. Beam logical [Enumeration type](https://beam.apache.org/documentation/programming-guide/#enumerationtype) +17. Beam logical [Date type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/Date.html) +18. Beam logical [UUID type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/UuidLogicalType.html) From 22ba2d19c8bdebb1fd2ad570a7c000c7694d1240 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 16:02:40 -0400 Subject: [PATCH 06/31] wip --- .../scala/magnolify/beam/BeamSchemaType.scala | 5 +- .../magnolify/beam/logical/package.scala | 107 ++++++++++++------ docs/beam.md | 41 ++++++- docs/mapping.md | 71 ++++++------ .../main/scala/magnolify/shared/Time.scala | 8 ++ 5 files changed, 157 insertions(+), 75 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index 67b489a4f..096591b7e 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -27,6 +27,7 @@ import magnolify.shims.FactoryCompat import org.apache.beam.sdk.schemas.logicaltypes import java.nio.ByteBuffer +import java.time.temporal.ChronoField import java.{time as jt, util as ju} import scala.annotation.implicitNotFound import scala.collection.concurrent @@ -215,8 +216,8 @@ object BeamSchemaField { id[jt.LocalDate](_ => FieldType.logicalType(new logicaltypes.Date)) private lazy val EpochJodaDate = new joda.LocalDate(1970, 1, 1) implicit val bsfJodaLocalDate: BeamSchemaField[joda.LocalDate] = - from[Int](daysFromEpoch => EpochJodaDate.plusDays(daysFromEpoch))(d => - joda.Days.daysBetween(EpochJodaDate, d).getDays + from[jt.LocalDate](jtld => EpochJodaDate.plusDays(jtld.get(ChronoField.EPOCH_DAY)))(d => + jt.LocalDate.ofEpochDay(joda.Days.daysBetween(EpochJodaDate, d).getDays.toLong) ) implicit def bsfEnum[T](implicit et: EnumType[T], lp: shapeless.LowPriority): BeamSchemaField[T] = diff --git a/beam/src/main/scala/magnolify/beam/logical/package.scala b/beam/src/main/scala/magnolify/beam/logical/package.scala index 6475a2fdc..70c75449f 100644 --- a/beam/src/main/scala/magnolify/beam/logical/package.scala +++ b/beam/src/main/scala/magnolify/beam/logical/package.scala @@ -16,77 +16,110 @@ package magnolify.beam +import org.apache.beam.sdk.schemas.logicaltypes +import org.apache.beam.sdk.schemas.Schema.FieldType import org.joda.time as joda + import java.time as jt package object logical { import magnolify.shared.Time._ object millis { - // joda - // DATETIME A timestamp represented as milliseconds since the epoch + implicit val bsfInstantMillis: BeamSchemaField[jt.Instant] = + BeamSchemaField.id[jt.Instant](_ => FieldType.DATETIME) + implicit val bsfJodaInstantMillis: BeamSchemaField[joda.Instant] = + BeamSchemaField.from[jt.Instant](i => millisToJodaInstant(millisFromInstant(i)))(i => + millisToInstant(millisFromJodaInstant(i)) + ) // joda.DateTime only has millisecond resolution implicit val bsfJodaDateTimeMillis: BeamSchemaField[joda.DateTime] = - BeamSchemaField.from[Long](millisToJodaDateTime)(millisFromJodaDateTime) - // DATETIME A timestamp represented as milliseconds since the epoch - implicit val bsfJodaInstantMillis: BeamSchemaField[joda.Instant] = - BeamSchemaField.from[Long](millisToJodaInstant)(millisFromJodaInstant) - implicit val bsfJodaDurationMillis: BeamSchemaField[joda.Duration] = - BeamSchemaField.from[Long](millisToJodaDuration)(millisFromJodaDuration) - implicit val bsfJodaLocalTimeMillis: BeamSchemaField[joda.LocalTime] = - BeamSchemaField.from[Int](millisToJodaLocalTime)(millisFromJodaLocalTime) - // java - implicit val bsfInstantMillis: BeamSchemaField[jt.Instant] = - BeamSchemaField.from[Long](millisToInstant)(millisFromInstant) + BeamSchemaField.from[jt.Instant](i => millisToJodaDateTime(millisFromInstant(i)))(dt => + millisToInstant(millisFromJodaDateTime(dt)) + ) + implicit val bsLocalTimeMillis: BeamSchemaField[jt.LocalTime] = BeamSchemaField.from[Int](millisToLocalTime)(millisFromLocalTime) + implicit val bsfJodaLocalTimeMillis: BeamSchemaField[joda.LocalTime] = + BeamSchemaField.from[Int](millisToJodaLocalTime)(millisFromJodaLocalTime) + implicit val bsfLocalDateTimeMillis: BeamSchemaField[jt.LocalDateTime] = - BeamSchemaField.from[Long](millisToLocalDateTime)(millisFromLocalDateTime) + BeamSchemaField.id[jt.LocalDateTime](_ => FieldType.logicalType(new logicaltypes.DateTime())) + implicit val bsfJodaLocalDateTimeMillis: BeamSchemaField[joda.LocalDateTime] = + BeamSchemaField.from[jt.LocalDateTime](ldt => + millisToJodaLocalDateTime(millisFromLocalDateTime(ldt)) + )(ldt => millisToLocalDateTime(millisFromJodaLocalDateTime(ldt))) + implicit val bsfDurationMillis: BeamSchemaField[jt.Duration] = BeamSchemaField.from[Long](millisToDuration)(millisFromDuration) + implicit val bsfJodaDurationMillis: BeamSchemaField[joda.Duration] = + BeamSchemaField.from[Long](millisToJodaDuration)(millisFromJodaDuration) } object micros { - // joda.DateTime only has millisecond resolution, so excess precision is discarded - implicit val bsfJodaDateTimeMicros: BeamSchemaField[joda.DateTime] = - BeamSchemaField.from[Long](microsToJodaDateTime)(microsFromJodaDateTime) + // NOTE: logicaltypes.MicrosInstant() cannot be used as it throws assertion + // errors when greater-than-microsecond precision data is used + implicit val bsfInstantMicros: BeamSchemaField[jt.Instant] = + BeamSchemaField.from[Long](microsToInstant)(microsFromInstant) // joda.Instant has millisecond precision, excess precision discarded implicit val bsfJodaInstantMicros: BeamSchemaField[joda.Instant] = BeamSchemaField.from[Long](microsToJodaInstant)(microsFromJodaInstant) - // joda.Duration has millisecond precision, excess precision discarded - implicit val bsfJodaDurationMicros: BeamSchemaField[joda.Duration] = - BeamSchemaField.from[Long](microsToJodaDuration)(microsFromJodaDuration) + // joda.DateTime only has millisecond resolution, so excess precision is discarded + implicit val bsfJodaDateTimeMicros: BeamSchemaField[joda.DateTime] = + BeamSchemaField.from[Long](microsToJodaDateTime)(microsFromJodaDateTime) + + implicit val bsfLocalTimeMicros: BeamSchemaField[jt.LocalTime] = + BeamSchemaField.from[Long](microsToLocalTime)(microsFromLocalTime) implicit val bsfJodaLocalTimeMicros: BeamSchemaField[joda.LocalTime] = BeamSchemaField.from[Long](microsToJodaLocalTime)(microsFromJodaLocalTime) - // java - implicit val bsfInstantMicros: BeamSchemaField[jt.Instant] = - BeamSchemaField.from[Long](microsToInstant)(microsFromInstant) - implicit val bsLocalTimeMicros: BeamSchemaField[jt.LocalTime] = - BeamSchemaField.from[Long](microsToLocalTime)(microsFromLocalTime) + implicit val bsfLocalDateTimeMicros: BeamSchemaField[jt.LocalDateTime] = BeamSchemaField.from[Long](microsToLocalDateTime)(microsFromLocalDateTime) + // joda.LocalDateTime has millisecond precision, excess precision discarded + implicit val bsfJodaLocalDateTimeMicros: BeamSchemaField[joda.LocalDateTime] = + BeamSchemaField.from[Long](microsToJodaLocalDateTime)(microsFromJodaLocalDateTime) + implicit val bsfDurationMicros: BeamSchemaField[jt.Duration] = BeamSchemaField.from[Long](microsToDuration)(microsFromDuration) + // joda.Duration has millisecond precision, excess precision discarded + implicit val bsfJodaDurationMicros: BeamSchemaField[joda.Duration] = + BeamSchemaField.from[Long](microsToJodaDuration)(microsFromJodaDuration) } object nanos { + implicit val bsfInstantNanos: BeamSchemaField[jt.Instant] = + BeamSchemaField.id[jt.Instant](_ => FieldType.logicalType(new logicaltypes.NanosInstant())) + implicit val bsfJodaInstantNanos: BeamSchemaField[joda.Instant] = + BeamSchemaField.from[jt.Instant](i => nanosToJodaInstant(nanosFromInstant(i)))(i => + nanosToInstant(nanosFromJodaInstant(i)) + ) // joda.DateTime only has millisecond resolution implicit val bsfJodaDateTimeNanos: BeamSchemaField[joda.DateTime] = - BeamSchemaField.from[Long](nanosToJodaDateTime)(nanosFromJodaDateTime) - implicit val bsfJodaInstantNanos: BeamSchemaField[joda.Instant] = - BeamSchemaField.from[Long](nanosToJodaInstant)(nanosFromJodaInstant) - implicit val bsfJodaDurationNanos: BeamSchemaField[joda.Duration] = - BeamSchemaField.from[Long](nanosToJodaDuration)(nanosFromJodaDuration) - implicit val bsfJodaLocalTimeNanos: BeamSchemaField[joda.LocalTime] = - BeamSchemaField.from[Long](nanosToJodaLocalTime)(nanosFromJodaLocalTime) - // java - implicit val bsfInstantNanos: BeamSchemaField[jt.Instant] = - BeamSchemaField.from[Long](nanosToInstant)(nanosFromInstant) + BeamSchemaField.from[jt.Instant](i => nanosToJodaDateTime(nanosFromInstant(i)))(i => + nanosToInstant(nanosFromJodaDateTime(i)) + ) + implicit val bsLocalTimeNanos: BeamSchemaField[jt.LocalTime] = - BeamSchemaField.from[Long](nanosToLocalTime)(nanosFromLocalTime) + BeamSchemaField.id[jt.LocalTime](_ => FieldType.logicalType(new logicaltypes.Time())) + implicit val bsfJodaLocalTimeNanos: BeamSchemaField[joda.LocalTime] = + BeamSchemaField.from[jt.LocalTime](lt => nanosToJodaLocalTime(nanosFromLocalTime(lt)))(lt => + nanosToLocalTime(nanosFromJodaLocalTime(lt)) + ) + implicit val bsfLocalDateTimeNanos: BeamSchemaField[jt.LocalDateTime] = BeamSchemaField.from[Long](nanosToLocalDateTime)(nanosFromLocalDateTime) + // joda.LocalDateTime has millisecond precision, excess precision discarded + implicit val bsfJodaLocalDateTimeMicros: BeamSchemaField[joda.LocalDateTime] = + BeamSchemaField.from[jt.LocalDateTime](ldt => + nanosToJodaLocalDateTime(nanosFromLocalDateTime(ldt)) + )(ldt => nanosToLocalDateTime(nanosFromJodaLocalDateTime(ldt))) + implicit val bsfDurationNanos: BeamSchemaField[jt.Duration] = - BeamSchemaField.from[Long](nanosToDuration)(nanosFromDuration) + BeamSchemaField.id[jt.Duration](_ => FieldType.logicalType(new logicaltypes.NanosDuration())) + // joda.Duration has millisecond precision, excess precision discarded + implicit val bsfJodaDurationNanos: BeamSchemaField[joda.Duration] = + BeamSchemaField.from[jt.Duration](d => nanosToJodaDuration(nanosFromDuration(d)))(d => + nanosToDuration(nanosFromJodaDuration(d)) + ) } } diff --git a/docs/beam.md b/docs/beam.md index 0009c2f34..009427724 100644 --- a/docs/beam.md +++ b/docs/beam.md @@ -1,3 +1,42 @@ # Beam -https://beam.apache.org/documentation/programming-guide/#schema-definition \ No newline at end of file +`BeamSchemaType[T]` provides conversion between Scala type `T` and a [Beam Schema](https://beam.apache.org/documentation/programming-guide/#schema-definition). Custom support for type `T` can be added with an implicit intsance of `BeamSchemaField[T]`. + +```scala mdoc:compile-only +import java.net.URI + +case class Inner(long: Long, str: String, uri: URI) +case class Outer(inner: Inner) +val record = Outer(Inner(1L, "hello", URI.create("https://www.spotify.com"))) + +import magnolify.beam.* +// Encode custom type URI as String +implicit val uriField: BeamSchemaField[URI] = BeamSchemaField.from[String](URI.create)(_.toString) + +val beamSchemaType = BeamSchemaType[Outer] +val row = beamSchemaType.to(record) +val copy: Outer = beamSchemaType.from(row) + +// Beam Schema +val schema = beamSchemaType.schema +``` + +Enum-like types map to the Beam logical [Enum type]((https://beam.apache.org/documentation/programming-guide/#enumerationtype)). See @ref:[EnumType](enums.md) for more details. `UnsafeEnum[T]` instances are available from `import magnolify.beam.unsafe._`. + +To use a different field case format in target records, add an optional `CaseMapper` argument to `BeamSchemaType`: + +```scala mdoc:compile-only +import magnolify.beam.* +import magnolify.shared.CaseMapper +import com.google.common.base.CaseFormat + +case class LowerCamel(firstName: String, lastName: String) + +val toSnakeCase = CaseFormat.LOWER_CAMEL.converterTo(CaseFormat.LOWER_UNDERSCORE).convert _ +val beamSchemaType = BeamSchemaType[LowerCamel](CaseMapper(toSnakeCase)) +beamSchemaType.to(LowerCamel("John", "Doe")) // Row(first_name: John, last_name: Doe) +``` + +Use `import magnolify.beam.logical.millis._`, `import magnolify.beam.logical.micros._` or `import magnolify.beam.logical.nanos._` as appropriate for your use-case. +Beam's `DATETIME` type maps to the millisecond-precision `java.time.Instant`. +Beam's `DateTime` logical type is used for millisecond-precision `java.time.LocalDateTime`, the `NanosInstant` logical type for nanosecond-precision `java.time.Instant`, the `Time` logical type for nanosecond-precision `java.time.LocalTime`, and the `NanosDuration` logical type for `java.time.Duration`. \ No newline at end of file diff --git a/docs/mapping.md b/docs/mapping.md index 2b3b9222b..5dfc2df89 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -1,38 +1,38 @@ # Type Mapping -| Scala | Avro | Beam | BigQuery | Bigtable7 | Datastore | Parquet | Protobuf | TensorFlow | -|-----------------------------------|------------------------------|----------------------------------|------------------------|---------------------------------|-----------------------|-----------------------------------|-------------------------|---------------------| -| `Unit` | `null` | x | x | x | `Null` | x | x | x | -| `Boolean` | `boolean` | `BOOLEAN` | `BOOL` | `Byte` | `Boolean` | `BOOLEAN` | `Boolean` | `INT64`3 | -| `Char` | `int`3 | `BYTE` | `INT64`3 | `Char` | `Integer`3 | `INT32`3 | `Int`3 | `INT64`3 | -| `Byte` | `int`3 | `BYTE` | `INT64`3 | `Byte` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | -| `Short` | `int`3 | `INT16` | `INT64`3 | `Short` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | -| `Int` | `int` | `INT32` | `INT64`3 | `Int` | `Integer`3 | `INT32`9 | `Int` | `INT64`3 | -| `Long` | `long` | `INT64` | `INT64` | `Long` | `Integer` | `INT64`9 | `Long` | `INT64` | -| `Float` | `float` | `FLOAT` | `FLOAT64`3 | `Float` | `Double`3 | `FLOAT` | `Float` | `FLOAT` | -| `Double` | `double` | `DOUBLE` | `FLOAT64` | `Double` | `Double` | `DOUBLE` | `Double` | `FLOAT`3 | -| `CharSequence` | `string` | `STRING` | x | x | x | x | x | x | -| `String` | `string` | `STRING` | `STRING` | `String` | `String` | `BINARY` | `String` | `BYTES`3 | -| `Array[Byte]` | `bytes` | `BYTES` | `BYTES` | `ByteString` | `Blob` | `BINARY` | `ByteString` | `BYTES` | -| `ByteString` | x | `BYTES` | x | `ByteString` | `Blob` | x | `ByteString` | `BYTES` | -| `ByteBuffer` | `bytes` | `BYTES` | x | x | | x | x | x | -| Enum1 | `enum` | `STRING`16 | `STRING`3 | `String` | `String`3 | `BINARY`/`ENUM`9 | Enum | `BYTES`3 | -| `BigInt` | x | x | x | `BigInt` | x | x | x | x | -| `BigDecimal` | `bytes`4 | `DECIMAL` | `NUMERIC`6 | `Int` scale + unscaled `BigInt` | x | `LOGICAL[DECIMAL]`9,14 | x | x | -| `Option[T]` | `union[null, T]`5 | Empty as `null` | `NULLABLE` | Empty as `None` | Absent as `None` | `OPTIONAL` | `optional`10 | Size <= 1 | -| `Iterable[T]`2 | `array[T]` | `ITERABLE` | `REPEATED` | x | `Array` | `REPEATED`13 | `repeated` | Size >= 0 | -| Nested | `record` | `ROW` | `STRUCT` | Flat8 | `Entity` | Group | `Message` | Flat8 | -| `Map[K, V]` | `map[V]`15 | `MAP` | x | x | x | x | `map` | x | -| `java.time.Instant` | `long`11 | `INT64` | `TIMESTAMP` | x | `Timestamp` | `LOGICAL[TIMESTAMP]`9 | x | x | -| `java.time.LocalDateTime` | `long`11 | `INT64` | `DATETIME` | x | x | `LOGICAL[TIMESTAMP]`9 | x | x | -| `java.time.OffsetTime` | x | x | x | x | x | `LOGICAL[TIME]`9 | x | x | -| `java.time.LocalTime` | `long`11 | `INT32` | `TIME` | x | x | `LOGICAL[TIME]`9 | x | x | -| `java.time.LocalDate` | `int`11 | `INT64`17 | `DATE` | x | x | `LOGICAL[DATE]`9 | x | x | -| `org.joda.time.LocalDate` | `int`11 | `INT32` | x | x | x | x | x | x | -| `org.joda.time.DateTime` | `int`11 | `INT64` | x | x | x | x | x | x | -| `org.joda.time.LocalTime` | `int`11 | `INT32` | x | x | x | x | x | x | -| `java.util.UUID` | `string`4 | `ROW(INT64, INT64)`18 | x | ByteString (16 bytes) | x | `FIXED[16]` | x | x | -| `(Long, Long, Long)`12 | `fixed[12]` | x | x | x | x | x | x | x | +| Scala | Avro | Beam | BigQuery | Bigtable7 | Datastore | Parquet | Protobuf | TensorFlow | +|-----------------------------------|------------------------------|------------------------------------|------------------------|---------------------------------|-----------------------|-----------------------------------|-------------------------|---------------------| +| `Unit` | `null` | x | x | x | `Null` | x | x | x | +| `Boolean` | `boolean` | `BOOLEAN` | `BOOL` | `Byte` | `Boolean` | `BOOLEAN` | `Boolean` | `INT64`3 | +| `Char` | `int`3 | `BYTE` | `INT64`3 | `Char` | `Integer`3 | `INT32`3 | `Int`3 | `INT64`3 | +| `Byte` | `int`3 | `BYTE` | `INT64`3 | `Byte` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | +| `Short` | `int`3 | `INT16` | `INT64`3 | `Short` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | +| `Int` | `int` | `INT32` | `INT64`3 | `Int` | `Integer`3 | `INT32`9 | `Int` | `INT64`3 | +| `Long` | `long` | `INT64` | `INT64` | `Long` | `Integer` | `INT64`9 | `Long` | `INT64` | +| `Float` | `float` | `FLOAT` | `FLOAT64`3 | `Float` | `Double`3 | `FLOAT` | `Float` | `FLOAT` | +| `Double` | `double` | `DOUBLE` | `FLOAT64` | `Double` | `Double` | `DOUBLE` | `Double` | `FLOAT`3 | +| `CharSequence` | `string` | `STRING` | x | x | x | x | x | x | +| `String` | `string` | `STRING` | `STRING` | `String` | `String` | `BINARY` | `String` | `BYTES`3 | +| `Array[Byte]` | `bytes` | `BYTES` | `BYTES` | `ByteString` | `Blob` | `BINARY` | `ByteString` | `BYTES` | +| `ByteString` | x | `BYTES` | x | `ByteString` | `Blob` | x | `ByteString` | `BYTES` | +| `ByteBuffer` | `bytes` | `BYTES` | x | x | | x | x | x | +| Enum1 | `enum` | `STRING`16 | `STRING`3 | `String` | `String`3 | `BINARY`/`ENUM`9 | Enum | `BYTES`3 | +| `BigInt` | x | x | x | `BigInt` | x | x | x | x | +| `BigDecimal` | `bytes`4 | `DECIMAL` | `NUMERIC`6 | `Int` scale + unscaled `BigInt` | x | `LOGICAL[DECIMAL]`9,14 | x | x | +| `Option[T]` | `union[null, T]`5 | Empty as `null` | `NULLABLE` | Empty as `None` | Absent as `None` | `OPTIONAL` | `optional`10 | Size <= 1 | +| `Iterable[T]`2 | `array[T]` | `ITERABLE` | `REPEATED` | x | `Array` | `REPEATED`13 | `repeated` | Size >= 0 | +| Nested | `record` | `ROW` | `STRUCT` | Flat8 | `Entity` | Group | `Message` | Flat8 | +| `Map[K, V]` | `map[V]`15 | `MAP` | x | x | x | x | `map` | x | +| `java.time.Instant` | `long`11 | `DATETIME` or `INT64`17 | `TIMESTAMP` | x | `Timestamp` | `LOGICAL[TIMESTAMP]`9 | x | x | +| `java.time.LocalDateTime` | `long`11 | `INT64` | `DATETIME` | x | x | `LOGICAL[TIMESTAMP]`9 | x | x | +| `java.time.OffsetTime` | x | x | x | x | x | `LOGICAL[TIME]`9 | x | x | +| `java.time.LocalTime` | `long`11 | `INT32` | `TIME` | x | x | `LOGICAL[TIME]`9 | x | x | +| `java.time.LocalDate` | `int`11 | `INT64`18 | `DATE` | x | x | `LOGICAL[DATE]`9 | x | x | +| `org.joda.time.LocalDate` | `int`11 | `INT32` | x | x | x | x | x | x | +| `org.joda.time.DateTime` | `int`11 | `INT64` | x | x | x | x | x | x | +| `org.joda.time.LocalTime` | `int`11 | `INT32` | x | x | x | x | x | x | +| `java.util.UUID` | `string`4 | `ROW(INT64, INT64)`19 | x | ByteString (16 bytes) | x | `FIXED[16]` | x | x | +| `(Long, Long, Long)`12 | `fixed[12]` | x | x | x | x | x | x | x | 1. Those wrapped in`UnsafeEnum` are encoded as strings, see [enums.md](https://github.com/spotify/magnolify/blob/master/docs/enums.md) for more @@ -60,5 +60,6 @@ one of: `magnolify.parquet.ParquetField.{decimal32, decimal64, decimalFixed, decimalBinary}`. 15. Map key type in avro is fixed to string. Scala Map key type must be either `String` or `CharSequence`. 16. Beam logical [Enumeration type](https://beam.apache.org/documentation/programming-guide/#enumerationtype) -17. Beam logical [Date type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/Date.html) -18. Beam logical [UUID type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/UuidLogicalType.html) +17. Beam logical [DateTime](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/DateTime.html), at millisecond precision. Or `INT64` when micro or nano precision is used. +18. Beam logical [Date type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/Date.html) +19. Beam logical [UUID type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/UuidLogicalType.html) diff --git a/shared/src/main/scala/magnolify/shared/Time.scala b/shared/src/main/scala/magnolify/shared/Time.scala index 83ce8b8f9..4eaa69c59 100644 --- a/shared/src/main/scala/magnolify/shared/Time.scala +++ b/shared/src/main/scala/magnolify/shared/Time.scala @@ -97,6 +97,10 @@ object Time { LocalDateTime.ofInstant(microsToInstant(microsFromEpoch), ZoneOffset.UTC) @inline def microsFromLocalDateTime(ldt: LocalDateTime): Long = microsFromInstant(ldt.toInstant(ZoneOffset.UTC)) + @inline def microsToJodaLocalDateTime(microsFromEpoch: Long): joda.LocalDateTime = + new joda.LocalDateTime(TimeUnit.MICROSECONDS.toMillis(microsFromEpoch), joda.DateTimeZone.UTC) + @inline def microsFromJodaLocalDateTime(ldt: joda.LocalDateTime): Long = + TimeUnit.MILLISECONDS.toMicros(ldt.toDateTime(joda.DateTimeZone.UTC).getMillis) @inline def microsToDuration(micros: Long): Duration = Duration.ofMillis(TimeUnit.MICROSECONDS.toMillis(micros)) @@ -135,6 +139,10 @@ object Time { LocalDateTime.ofInstant(nanosToInstant(nanosFromEpoch), ZoneOffset.UTC) @inline def nanosFromLocalDateTime(ldt: LocalDateTime): Long = nanosFromInstant(ldt.toInstant(ZoneOffset.UTC)) + @inline def nanosToJodaLocalDateTime(nanosFromEpoch: Long): joda.LocalDateTime = + new joda.LocalDateTime(TimeUnit.NANOSECONDS.toMillis(nanosFromEpoch), joda.DateTimeZone.UTC) + @inline def nanosFromJodaLocalDateTime(ldt: joda.LocalDateTime): Long = + TimeUnit.MILLISECONDS.toNanos(ldt.toDateTime(joda.DateTimeZone.UTC).getMillis) @inline def nanosToDuration(nanos: Long): Duration = Duration.ofNanos(nanos) From caa1f2b68243aaff2745bf1d1d6233c3d418f304 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 16:38:54 -0400 Subject: [PATCH 07/31] wip --- .../scala/magnolify/beam/BeamSchemaType.scala | 12 +--- .../magnolify/beam/logical/package.scala | 31 +++++++- .../magnolify/beam/BeamSchemaTypeSuite.scala | 21 +++++- docs/beam.md | 32 +++++++-- docs/mapping.md | 71 +++++++++---------- 5 files changed, 110 insertions(+), 57 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index 096591b7e..c7c2b6ed9 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -21,14 +21,12 @@ import magnolify.shared.* import org.apache.beam.sdk.schemas.Schema import org.apache.beam.sdk.schemas.Schema.FieldType import org.apache.beam.sdk.values.Row -import org.joda.time as joda import com.google.protobuf.ByteString import magnolify.shims.FactoryCompat import org.apache.beam.sdk.schemas.logicaltypes import java.nio.ByteBuffer -import java.time.temporal.ChronoField -import java.{time as jt, util as ju} +import java.util as ju import scala.annotation.implicitNotFound import scala.collection.concurrent import scala.jdk.CollectionConverters.* @@ -212,14 +210,6 @@ object BeamSchemaField { implicit val bsfUUID: BeamSchemaField[ju.UUID] = id[ju.UUID](_ => FieldType.logicalType(new logicaltypes.UuidLogicalType)) - implicit val bsfLocalDate: BeamSchemaField[jt.LocalDate] = - id[jt.LocalDate](_ => FieldType.logicalType(new logicaltypes.Date)) - private lazy val EpochJodaDate = new joda.LocalDate(1970, 1, 1) - implicit val bsfJodaLocalDate: BeamSchemaField[joda.LocalDate] = - from[jt.LocalDate](jtld => EpochJodaDate.plusDays(jtld.get(ChronoField.EPOCH_DAY)))(d => - jt.LocalDate.ofEpochDay(joda.Days.daysBetween(EpochJodaDate, d).getDays.toLong) - ) - implicit def bsfEnum[T](implicit et: EnumType[T], lp: shapeless.LowPriority): BeamSchemaField[T] = new BeamSchemaField[T] { type FromT = logicaltypes.EnumerationType.Value diff --git a/beam/src/main/scala/magnolify/beam/logical/package.scala b/beam/src/main/scala/magnolify/beam/logical/package.scala index 70c75449f..f7d4c472b 100644 --- a/beam/src/main/scala/magnolify/beam/logical/package.scala +++ b/beam/src/main/scala/magnolify/beam/logical/package.scala @@ -18,13 +18,25 @@ package magnolify.beam import org.apache.beam.sdk.schemas.logicaltypes import org.apache.beam.sdk.schemas.Schema.FieldType +import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes import org.joda.time as joda import java.time as jt +import java.time.temporal.ChronoField package object logical { import magnolify.shared.Time._ + object date { + implicit val bsfLocalDate: BeamSchemaField[jt.LocalDate] = + BeamSchemaField.id[jt.LocalDate](_ => FieldType.logicalType(new logicaltypes.Date)) + private lazy val EpochJodaDate = new joda.LocalDate(1970, 1, 1) + implicit val bsfJodaLocalDate: BeamSchemaField[joda.LocalDate] = + BeamSchemaField.from[jt.LocalDate](jtld => + EpochJodaDate.plusDays(jtld.get(ChronoField.EPOCH_DAY)) + )(d => jt.LocalDate.ofEpochDay(joda.Days.daysBetween(EpochJodaDate, d).getDays.toLong)) + } + object millis { implicit val bsfInstantMillis: BeamSchemaField[jt.Instant] = BeamSchemaField.id[jt.Instant](_ => FieldType.DATETIME) @@ -32,13 +44,12 @@ package object logical { BeamSchemaField.from[jt.Instant](i => millisToJodaInstant(millisFromInstant(i)))(i => millisToInstant(millisFromJodaInstant(i)) ) - // joda.DateTime only has millisecond resolution implicit val bsfJodaDateTimeMillis: BeamSchemaField[joda.DateTime] = BeamSchemaField.from[jt.Instant](i => millisToJodaDateTime(millisFromInstant(i)))(dt => millisToInstant(millisFromJodaDateTime(dt)) ) - implicit val bsLocalTimeMillis: BeamSchemaField[jt.LocalTime] = + implicit val bsfLocalTimeMillis: BeamSchemaField[jt.LocalTime] = BeamSchemaField.from[Int](millisToLocalTime)(millisFromLocalTime) implicit val bsfJodaLocalTimeMillis: BeamSchemaField[joda.LocalTime] = BeamSchemaField.from[Int](millisToJodaLocalTime)(millisFromJodaLocalTime) @@ -70,6 +81,7 @@ package object logical { implicit val bsfLocalTimeMicros: BeamSchemaField[jt.LocalTime] = BeamSchemaField.from[Long](microsToLocalTime)(microsFromLocalTime) + // joda.LocalTime only has millisecond resolution, so excess precision is discarded implicit val bsfJodaLocalTimeMicros: BeamSchemaField[joda.LocalTime] = BeamSchemaField.from[Long](microsToJodaLocalTime)(microsFromJodaLocalTime) @@ -89,6 +101,7 @@ package object logical { object nanos { implicit val bsfInstantNanos: BeamSchemaField[jt.Instant] = BeamSchemaField.id[jt.Instant](_ => FieldType.logicalType(new logicaltypes.NanosInstant())) + // joda.Instant has millisecond precision, excess precision discarded implicit val bsfJodaInstantNanos: BeamSchemaField[joda.Instant] = BeamSchemaField.from[jt.Instant](i => nanosToJodaInstant(nanosFromInstant(i)))(i => nanosToInstant(nanosFromJodaInstant(i)) @@ -99,8 +112,9 @@ package object logical { nanosToInstant(nanosFromJodaDateTime(i)) ) - implicit val bsLocalTimeNanos: BeamSchemaField[jt.LocalTime] = + implicit val bsfLocalTimeNanos: BeamSchemaField[jt.LocalTime] = BeamSchemaField.id[jt.LocalTime](_ => FieldType.logicalType(new logicaltypes.Time())) + // joda.LocalTime only has millisecond resolution, so excess precision is discarded implicit val bsfJodaLocalTimeNanos: BeamSchemaField[joda.LocalTime] = BeamSchemaField.from[jt.LocalTime](lt => nanosToJodaLocalTime(nanosFromLocalTime(lt)))(lt => nanosToLocalTime(nanosFromJodaLocalTime(lt)) @@ -122,4 +136,15 @@ package object logical { nanosToDuration(nanosFromJodaDuration(d)) ) } + + object sql { + implicit val bsfSqlLocalTime: BeamSchemaField[jt.LocalTime] = + BeamSchemaField.id(_ => FieldType.logicalType(SqlTypes.TIME)) + implicit val bsfSqlInstant: BeamSchemaField[jt.Instant] = + BeamSchemaField.id(_ => FieldType.logicalType(SqlTypes.TIMESTAMP)) + implicit val bsfSqlLocalDateTime: BeamSchemaField[jt.LocalDateTime] = + BeamSchemaField.id(_ => FieldType.logicalType(SqlTypes.DATETIME)) + implicit val bsfSqlLocalDate: BeamSchemaField[jt.LocalDate] = + BeamSchemaField.id(_ => FieldType.logicalType(SqlTypes.DATE)) + } } diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala index 9a5cf127a..698e03854 100644 --- a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala @@ -79,6 +79,12 @@ class BeamSchemaTypeSuite extends MagnolifySuite { test[UnsafeEnums] } + property("Date") { + import magnolify.beam.logical.date.* + test[JavaDate] + test[JodaDate] + } + property("Millis") { import magnolify.beam.logical.millis.* test[JavaTime] @@ -127,6 +133,11 @@ class BeamSchemaTypeSuite extends MagnolifySuite { val record = bst(HasValueClass(ValueClass("String"))) assert(record.getValue[String]("vc").equals("String")) } + + property("Sql") { + import magnolify.beam.logical.sql.* + test[Sql] + } } case class Bs(bs: ByteString) @@ -137,9 +148,17 @@ case class Logical( ul: List[UUID], ulo: List[Option[UUID]] ) + +case class Sql( + i: Instant, + dt: LocalDateTime, + t: LocalTime, + d: LocalDate +) +case class JavaDate(d: LocalDate) +case class JodaDate(jd: joda.LocalDate) case class JavaTime( i: Instant, - d: LocalDate, dt: LocalDateTime, t: LocalTime ) diff --git a/docs/beam.md b/docs/beam.md index 009427724..01f787570 100644 --- a/docs/beam.md +++ b/docs/beam.md @@ -21,7 +21,31 @@ val copy: Outer = beamSchemaType.from(row) val schema = beamSchemaType.schema ``` -Enum-like types map to the Beam logical [Enum type]((https://beam.apache.org/documentation/programming-guide/#enumerationtype)). See @ref:[EnumType](enums.md) for more details. `UnsafeEnum[T]` instances are available from `import magnolify.beam.unsafe._`. +## Enums +Enum-like types map to the Beam logical [Enum type]((https://beam.apache.org/documentation/programming-guide/#enumerationtype)). See @ref:[EnumType](enums.md) for more details. `UnsafeEnum[T]` instances are available from `import magnolify.beam.unsafe.*`. + +## Time and dates + +Java and joda `LocalDate` types are available via `import magnolify.beam.logical.date.*` + +For date-time, instants, and durations, use `import magnolify.beam.logical.millis.*`, `import magnolify.beam.logical.micros.*` or `import magnolify.beam.logical.nanos.*` as appropriate for your use-case. +Note that joda types have only millisecond resolution, so excess precision will be discarded when used with `micros` or `nanos`. + +Where possible, Beam logical types are used and joda types defer to these implementations: + +* Beam's `DATETIME` primitive type maps to the millisecond-precision java and joda `Instant`s and the joda `DateTime`. +* The `DateTime` logical type is used for millisecond-precision java and joda `LocalDateTime` +* The `NanosInstant` logical type is used for nanosecond-precision java and joda `Instant` +* The `Time` logical type is used for nanosecond-precision java and joda `LocalTime` +* The `NanosDuration` logical type is used for java and joda `Duration` + +Beam's `MicrosInstant` should not be used as it throws exceptions when presented with greater-than-microsecond precision data. + +## SQL types + +SQL-compatible logical types are supported via `import magnolify.beam.logical.sql.*` + +## Case mapping To use a different field case format in target records, add an optional `CaseMapper` argument to `BeamSchemaType`: @@ -35,8 +59,4 @@ case class LowerCamel(firstName: String, lastName: String) val toSnakeCase = CaseFormat.LOWER_CAMEL.converterTo(CaseFormat.LOWER_UNDERSCORE).convert _ val beamSchemaType = BeamSchemaType[LowerCamel](CaseMapper(toSnakeCase)) beamSchemaType.to(LowerCamel("John", "Doe")) // Row(first_name: John, last_name: Doe) -``` - -Use `import magnolify.beam.logical.millis._`, `import magnolify.beam.logical.micros._` or `import magnolify.beam.logical.nanos._` as appropriate for your use-case. -Beam's `DATETIME` type maps to the millisecond-precision `java.time.Instant`. -Beam's `DateTime` logical type is used for millisecond-precision `java.time.LocalDateTime`, the `NanosInstant` logical type for nanosecond-precision `java.time.Instant`, the `Time` logical type for nanosecond-precision `java.time.LocalTime`, and the `NanosDuration` logical type for `java.time.Duration`. \ No newline at end of file +``` \ No newline at end of file diff --git a/docs/mapping.md b/docs/mapping.md index 5dfc2df89..c86db47e1 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -1,38 +1,38 @@ # Type Mapping -| Scala | Avro | Beam | BigQuery | Bigtable7 | Datastore | Parquet | Protobuf | TensorFlow | -|-----------------------------------|------------------------------|------------------------------------|------------------------|---------------------------------|-----------------------|-----------------------------------|-------------------------|---------------------| -| `Unit` | `null` | x | x | x | `Null` | x | x | x | -| `Boolean` | `boolean` | `BOOLEAN` | `BOOL` | `Byte` | `Boolean` | `BOOLEAN` | `Boolean` | `INT64`3 | -| `Char` | `int`3 | `BYTE` | `INT64`3 | `Char` | `Integer`3 | `INT32`3 | `Int`3 | `INT64`3 | -| `Byte` | `int`3 | `BYTE` | `INT64`3 | `Byte` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | -| `Short` | `int`3 | `INT16` | `INT64`3 | `Short` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | -| `Int` | `int` | `INT32` | `INT64`3 | `Int` | `Integer`3 | `INT32`9 | `Int` | `INT64`3 | -| `Long` | `long` | `INT64` | `INT64` | `Long` | `Integer` | `INT64`9 | `Long` | `INT64` | -| `Float` | `float` | `FLOAT` | `FLOAT64`3 | `Float` | `Double`3 | `FLOAT` | `Float` | `FLOAT` | -| `Double` | `double` | `DOUBLE` | `FLOAT64` | `Double` | `Double` | `DOUBLE` | `Double` | `FLOAT`3 | -| `CharSequence` | `string` | `STRING` | x | x | x | x | x | x | -| `String` | `string` | `STRING` | `STRING` | `String` | `String` | `BINARY` | `String` | `BYTES`3 | -| `Array[Byte]` | `bytes` | `BYTES` | `BYTES` | `ByteString` | `Blob` | `BINARY` | `ByteString` | `BYTES` | -| `ByteString` | x | `BYTES` | x | `ByteString` | `Blob` | x | `ByteString` | `BYTES` | -| `ByteBuffer` | `bytes` | `BYTES` | x | x | | x | x | x | -| Enum1 | `enum` | `STRING`16 | `STRING`3 | `String` | `String`3 | `BINARY`/`ENUM`9 | Enum | `BYTES`3 | -| `BigInt` | x | x | x | `BigInt` | x | x | x | x | -| `BigDecimal` | `bytes`4 | `DECIMAL` | `NUMERIC`6 | `Int` scale + unscaled `BigInt` | x | `LOGICAL[DECIMAL]`9,14 | x | x | -| `Option[T]` | `union[null, T]`5 | Empty as `null` | `NULLABLE` | Empty as `None` | Absent as `None` | `OPTIONAL` | `optional`10 | Size <= 1 | -| `Iterable[T]`2 | `array[T]` | `ITERABLE` | `REPEATED` | x | `Array` | `REPEATED`13 | `repeated` | Size >= 0 | -| Nested | `record` | `ROW` | `STRUCT` | Flat8 | `Entity` | Group | `Message` | Flat8 | -| `Map[K, V]` | `map[V]`15 | `MAP` | x | x | x | x | `map` | x | -| `java.time.Instant` | `long`11 | `DATETIME` or `INT64`17 | `TIMESTAMP` | x | `Timestamp` | `LOGICAL[TIMESTAMP]`9 | x | x | -| `java.time.LocalDateTime` | `long`11 | `INT64` | `DATETIME` | x | x | `LOGICAL[TIMESTAMP]`9 | x | x | -| `java.time.OffsetTime` | x | x | x | x | x | `LOGICAL[TIME]`9 | x | x | -| `java.time.LocalTime` | `long`11 | `INT32` | `TIME` | x | x | `LOGICAL[TIME]`9 | x | x | -| `java.time.LocalDate` | `int`11 | `INT64`18 | `DATE` | x | x | `LOGICAL[DATE]`9 | x | x | -| `org.joda.time.LocalDate` | `int`11 | `INT32` | x | x | x | x | x | x | -| `org.joda.time.DateTime` | `int`11 | `INT64` | x | x | x | x | x | x | -| `org.joda.time.LocalTime` | `int`11 | `INT32` | x | x | x | x | x | x | -| `java.util.UUID` | `string`4 | `ROW(INT64, INT64)`19 | x | ByteString (16 bytes) | x | `FIXED[16]` | x | x | -| `(Long, Long, Long)`12 | `fixed[12]` | x | x | x | x | x | x | x | +| Scala | Avro | Beam | BigQuery | Bigtable7 | Datastore | Parquet | Protobuf | TensorFlow | +|-----------------------------------|------------------------------|-----------------------------------------|------------------------|---------------------------------|-----------------------|-----------------------------------|-------------------------|---------------------| +| `Unit` | `null` | x | x | x | `Null` | x | x | x | +| `Boolean` | `boolean` | `BOOLEAN` | `BOOL` | `Byte` | `Boolean` | `BOOLEAN` | `Boolean` | `INT64`3 | +| `Char` | `int`3 | `BYTE` | `INT64`3 | `Char` | `Integer`3 | `INT32`3 | `Int`3 | `INT64`3 | +| `Byte` | `int`3 | `BYTE` | `INT64`3 | `Byte` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | +| `Short` | `int`3 | `INT16` | `INT64`3 | `Short` | `Integer`3 | `INT32`9 | `Int`3 | `INT64`3 | +| `Int` | `int` | `INT32` | `INT64`3 | `Int` | `Integer`3 | `INT32`9 | `Int` | `INT64`3 | +| `Long` | `long` | `INT64` | `INT64` | `Long` | `Integer` | `INT64`9 | `Long` | `INT64` | +| `Float` | `float` | `FLOAT` | `FLOAT64`3 | `Float` | `Double`3 | `FLOAT` | `Float` | `FLOAT` | +| `Double` | `double` | `DOUBLE` | `FLOAT64` | `Double` | `Double` | `DOUBLE` | `Double` | `FLOAT`3 | +| `CharSequence` | `string` | `STRING` | x | x | x | x | x | x | +| `String` | `string` | `STRING` | `STRING` | `String` | `String` | `BINARY` | `String` | `BYTES`3 | +| `Array[Byte]` | `bytes` | `BYTES` | `BYTES` | `ByteString` | `Blob` | `BINARY` | `ByteString` | `BYTES` | +| `ByteString` | x | `BYTES` | x | `ByteString` | `Blob` | x | `ByteString` | `BYTES` | +| `ByteBuffer` | `bytes` | `BYTES` | x | x | | x | x | x | +| Enum1 | `enum` | `STRING`16 | `STRING`3 | `String` | `String`3 | `BINARY`/`ENUM`9 | Enum | `BYTES`3 | +| `BigInt` | x | x | x | `BigInt` | x | x | x | x | +| `BigDecimal` | `bytes`4 | `DECIMAL` | `NUMERIC`6 | `Int` scale + unscaled `BigInt` | x | `LOGICAL[DECIMAL]`9,14 | x | x | +| `Option[T]` | `union[null, T]`5 | Empty as `null` | `NULLABLE` | Empty as `None` | Absent as `None` | `OPTIONAL` | `optional`10 | Size <= 1 | +| `Iterable[T]`2 | `array[T]` | `ITERABLE` | `REPEATED` | x | `Array` | `REPEATED`13 | `repeated` | Size >= 0 | +| Nested | `record` | `ROW` | `STRUCT` | Flat8 | `Entity` | Group | `Message` | Flat8 | +| `Map[K, V]` | `map[V]`15 | `MAP` | x | x | x | x | `map` | x | +| `java.time.Instant` | `long`11 | `DATETIME`, `INT64`, `ROW`17 | `TIMESTAMP` | x | `Timestamp` | `LOGICAL[TIMESTAMP]`9 | x | x | +| `java.time.LocalDateTime` | `long`11 | `ROW`, `INT64`17 | `DATETIME` | x | x | `LOGICAL[TIMESTAMP]`9 | x | x | +| `java.time.OffsetTime` | x | x | x | x | x | `LOGICAL[TIME]`9 | x | x | +| `java.time.LocalTime` | `long`11 | `INT32`, `INT64`17 | `TIME` | x | x | `LOGICAL[TIME]`9 | x | x | +| `java.time.LocalDate` | `int`11 | `INT64`17 | `DATE` | x | x | `LOGICAL[DATE]`9 | x | x | +| `org.joda.time.LocalDate` | `int`11 | `INT64`17 | x | x | x | x | x | x | +| `org.joda.time.DateTime` | `int`11 | `DATETIME`, `INT64`, `ROW`17 | x | x | x | x | x | x | +| `org.joda.time.LocalTime` | `int`11 | `INT32`, `INT64`17 | x | x | x | x | x | x | +| `java.util.UUID` | `string`4 | `ROW`18 | x | ByteString (16 bytes) | x | `FIXED[16]` | x | x | +| `(Long, Long, Long)`12 | `fixed[12]` | x | x | x | x | x | x | x | 1. Those wrapped in`UnsafeEnum` are encoded as strings, see [enums.md](https://github.com/spotify/magnolify/blob/master/docs/enums.md) for more @@ -60,6 +60,5 @@ one of: `magnolify.parquet.ParquetField.{decimal32, decimal64, decimalFixed, decimalBinary}`. 15. Map key type in avro is fixed to string. Scala Map key type must be either `String` or `CharSequence`. 16. Beam logical [Enumeration type](https://beam.apache.org/documentation/programming-guide/#enumerationtype) -17. Beam logical [DateTime](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/DateTime.html), at millisecond precision. Or `INT64` when micro or nano precision is used. -18. Beam logical [Date type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/Date.html) -19. Beam logical [UUID type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/UuidLogicalType.html) +17. See [beam.md][protobuf.md](https://github.com/spotify/magnolify/blob/master/docs/beam.md) for details +18. Beam logical [UUID type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/UuidLogicalType.html) From 9c231b918906ce00d8e576d6ec00bf4c00a3a102 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 16:58:54 -0400 Subject: [PATCH 08/31] gh --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 81ef162b3..6da2ce591 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -99,11 +99,11 @@ jobs: - name: Make target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') - run: mkdir -p bom/target refined/target shared/target tensorflow/target parquet/target tools/target protobuf/target jmh/target bigquery/target avro/target scalacheck/target datastore/target neo4j/target cats/target bigtable/target guava/target project/target + run: mkdir -p bom/target refined/target shared/target tensorflow/target parquet/target tools/target protobuf/target jmh/target bigquery/target avro/target scalacheck/target beam/target datastore/target neo4j/target cats/target bigtable/target guava/target project/target - name: Compress target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') - run: tar cf targets.tar bom/target refined/target shared/target tensorflow/target parquet/target tools/target protobuf/target jmh/target bigquery/target avro/target scalacheck/target datastore/target neo4j/target cats/target bigtable/target guava/target project/target + run: tar cf targets.tar bom/target refined/target shared/target tensorflow/target parquet/target tools/target protobuf/target jmh/target bigquery/target avro/target scalacheck/target beam/target datastore/target neo4j/target cats/target bigtable/target guava/target project/target - name: Upload target directories if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') From d2386c9457fda430b2cdf758febc8a878d025600 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 17:08:37 -0400 Subject: [PATCH 09/31] T_T --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 21dfddb16..ee2a7ddd6 100644 --- a/build.sbt +++ b/build.sbt @@ -505,7 +505,7 @@ lazy val beam = project description := "Magnolia add-on for Apache Beam", libraryDependencies ++= Seq( "org.apache.beam" % "beam-sdks-java-core" % beamVersion % Provided, - "com.google.protobuf" % "protobuf-java" % protobufVersion % ProtobufConfig, + "com.google.protobuf" % "protobuf-java" % protobufVersion % ProtobufConfig ) ) From 23fceb8651b19fecc1b3f8f98f204fc89095730d Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 17:23:21 -0400 Subject: [PATCH 10/31] lit --- shared/src/main/scala/magnolify/shared/Time.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shared/src/main/scala/magnolify/shared/Time.scala b/shared/src/main/scala/magnolify/shared/Time.scala index 4eaa69c59..7cad70ee5 100644 --- a/shared/src/main/scala/magnolify/shared/Time.scala +++ b/shared/src/main/scala/magnolify/shared/Time.scala @@ -59,7 +59,7 @@ object Time { // micros ///////////////////////////////////////////////////// @inline def microsToInstant(microsFromEpoch: Long): Instant = { val epochSeconds = TimeUnit.MICROSECONDS.toSeconds(microsFromEpoch) - val nanoAdjustment = TimeUnit.MICROSECONDS.toNanos(microsFromEpoch % 1_000_000L) + val nanoAdjustment = TimeUnit.MICROSECONDS.toNanos(microsFromEpoch % 1000000L) Instant.ofEpochSecond(epochSeconds, nanoAdjustment) } @inline def microsFromInstant(instant: Instant): Long = { @@ -114,7 +114,7 @@ object Time { // nanos ///////////////////////////////////////////////////// // Long does not technically have enough range for Instant @inline def nanosToInstant(epochNanos: Long): Instant = - Instant.ofEpochSecond(TimeUnit.NANOSECONDS.toSeconds(epochNanos), epochNanos % 1_000_000_000L) + Instant.ofEpochSecond(TimeUnit.NANOSECONDS.toSeconds(epochNanos), epochNanos % 1000000000L) @inline def nanosFromInstant(instant: Instant): Long = TimeUnit.SECONDS.toNanos(instant.getEpochSecond) + instant.getNano @inline def nanosToJodaInstant(nanosFromEpoch: Long): joda.Instant = From 14dd0b28cd8f5bf5b573861950222e71f5be5555 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 17:49:17 -0400 Subject: [PATCH 11/31] mima --- build.sbt | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/build.sbt b/build.sbt index ee2a7ddd6..bfddb05aa 100644 --- a/build.sbt +++ b/build.sbt @@ -225,7 +225,15 @@ ThisBuild / githubWorkflowAddedJobs ++= Seq( // mima ThisBuild / mimaBinaryIssueFilters ++= Seq( // genFunnelMacro should not be available to users - ProblemFilters.exclude[DirectMissingMethodProblem]("magnolify.guava.auto.package.genFunnelMacro") + ProblemFilters.exclude[DirectMissingMethodProblem]("magnolify.guava.auto.package.genFunnelMacro"), + // incorrectly named implicit + ProblemFilters.exclude[DirectMissingMethodProblem]( + "magnolify.parquet.logical.package#micros.pfTimestampMillis" + ), + // incorrectly named implicit + ProblemFilters.exclude[DirectMissingMethodProblem]( + "magnolify.parquet.logical.package#micros.pfLocalDateTimeMillis" + ) ) ThisBuild / tlVersionIntroduced := Map("3" -> "0.8.0") @@ -506,7 +514,9 @@ lazy val beam = project libraryDependencies ++= Seq( "org.apache.beam" % "beam-sdks-java-core" % beamVersion % Provided, "com.google.protobuf" % "protobuf-java" % protobufVersion % ProtobufConfig - ) + ), + // TODO remove this line after release + tlMimaPreviousVersions := Set.empty ) lazy val bigquery = project From 67c49b94d1ffbc9714ccf86a6945cfb8c13ce0a0 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 17:56:54 -0400 Subject: [PATCH 12/31] missing --- beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala index 698e03854..129e81d66 100644 --- a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala @@ -166,7 +166,8 @@ case class JodaTime( i: joda.Instant, dt: joda.DateTime, lt: joda.LocalTime, - d: joda.Duration + d: joda.Duration, + ldt: joda.LocalDateTime ) case class Maps( ms: Map[String, String], From 848053ef69ae9b91efa0826e9dec8233997447c1 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 18:04:58 -0400 Subject: [PATCH 13/31] compat --- beam/src/main/scala/magnolify/beam/BeamSchemaType.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index c7c2b6ed9..05db9b5ae 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -28,6 +28,7 @@ import org.apache.beam.sdk.schemas.logicaltypes import java.nio.ByteBuffer import java.util as ju import scala.annotation.implicitNotFound +import scala.collection.compat.* import scala.collection.concurrent import scala.jdk.CollectionConverters.* From e7e50805f021003a3b5ee3f3e822d6427ba5337f Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 22 Aug 2024 18:16:25 -0400 Subject: [PATCH 14/31] site --- build.sbt | 1 + docs/index.md | 2 ++ 2 files changed, 3 insertions(+) diff --git a/build.sbt b/build.sbt index bfddb05aa..2f4a9c47a 100644 --- a/build.sbt +++ b/build.sbt @@ -774,6 +774,7 @@ lazy val site = project ) .dependsOn( avro % "compile->compile,provided", + beam % "compile->compile,provided", bigquery % "compile->compile,provided", bigtable % "compile->compile,provided", cats % "compile->compile,provided", diff --git a/docs/index.md b/docs/index.md index 537a022fe..c89bfb3fa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -11,6 +11,7 @@ A collection of [Magnolia](https://github.com/propensive/magnolia) add-ons for c This library includes the following modules. - @ref:[`magnolify-avro`](avro.md) - conversion between Scala types and [Apache Avro](https://github.com/apache/avro) `GenericRecord` +- @ref:[`magnolify-beam`](beam.md) - conversion between Scala types and [Apache Beam](https://beam.apache.org/) [schema types](https://beam.apache.org/documentation/programming-guide/#schemas) - @ref:[`magnolify-bigquery`](bigquery.md) - conversion between Scala types and [Google Cloud BigQuery](https://cloud.google.com/bigquery/) `TableRow` - @ref:[`magnolify-bigtable`](bigtable.md) - conversion between Scala types and [Google Cloud Bigtable](https://cloud.google.com/bigtable) to `Mutation`, from `Row` - @ref:[`magnolify-cats`](cats.md) - type class derivation for [Cats](https://github.com/typelevel/cats), specifically @@ -35,6 +36,7 @@ Complete type mapping @ref:[here](mapping.md). @@@ index - @ref:[Avro](avro.md) +- @ref:[Beam](beam.md) - @ref:[BigQuery](bigquery.md) - @ref:[Bigtable](bigtable.md) - @ref:[Cats](cats.md) From 141abd2f07c0c8de6f509be363d2304a369cc2eb Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 23 Aug 2024 11:07:40 -0400 Subject: [PATCH 15/31] test --- build.sbt | 5 +- .../magnolify/scalacheck/TestArbitrary.scala | 52 +------- .../magnolify/shared/TimeArbitrary.scala | 47 +++++++ .../scala/magnolify/shared/TimeSpec.scala | 125 ++++++++++++++++++ 4 files changed, 180 insertions(+), 49 deletions(-) create mode 100644 shared/src/test/scala/magnolify/shared/TimeArbitrary.scala create mode 100644 shared/src/test/scala/magnolify/shared/TimeSpec.scala diff --git a/build.sbt b/build.sbt index 2f4a9c47a..97c540a09 100644 --- a/build.sbt +++ b/build.sbt @@ -388,7 +388,8 @@ lazy val shared = project commonSettings, crossScalaVersions := Seq(scala3, scala213, scala212), moduleName := "magnolify-shared", - description := "Shared code for Magnolify" + description := "Shared code for Magnolify", + libraryDependencies += "org.scalacheck" %% "scalacheck" % scalacheckVersion % Test ) // shared code for unit tests @@ -408,7 +409,7 @@ lazy val test = project lazy val scalacheck = project .in(file("scalacheck")) .dependsOn( - shared, + shared % "test->test,compile->compile", test % "test->test" ) .settings( diff --git a/scalacheck/src/test/scala/magnolify/scalacheck/TestArbitrary.scala b/scalacheck/src/test/scala/magnolify/scalacheck/TestArbitrary.scala index e3da13639..c6452152a 100644 --- a/scalacheck/src/test/scala/magnolify/scalacheck/TestArbitrary.scala +++ b/scalacheck/src/test/scala/magnolify/scalacheck/TestArbitrary.scala @@ -17,18 +17,16 @@ package magnolify.scalacheck import magnolify.scalacheck.semiauto.ArbitraryDerivation -import magnolify.shared.UnsafeEnum -import magnolify.test.ADT._ +import magnolify.shared.{TimeArbitrary, UnsafeEnum} +import magnolify.test.ADT.* import magnolify.test.JavaEnums -import magnolify.test.Simple._ -import org.joda.{time => joda} -import org.scalacheck._ +import magnolify.test.Simple.* +import org.scalacheck.* import java.net.URI import java.nio.ByteBuffer -import java.time._ -object TestArbitrary { +object TestArbitrary extends TimeArbitrary { // null implicit lazy val arbNull: Arbitrary[Null] = Arbitrary(Gen.const(null)) @@ -44,46 +42,6 @@ object TestArbitrary { Arbitrary.arbitrary[Array[Byte]].map(ByteBuffer.wrap) } - // java-time - implicit lazy val arbInstant: Arbitrary[Instant] = - Arbitrary(Gen.posNum[Long].map(Instant.ofEpochMilli)) - implicit lazy val arbLocalDate: Arbitrary[LocalDate] = - Arbitrary(Gen.chooseNum(0L, 365L * 100).map(LocalDate.ofEpochDay)) - implicit lazy val arbLocalTime: Arbitrary[LocalTime] = - Arbitrary(arbInstant.arbitrary.map(_.atZone(ZoneOffset.UTC).toLocalTime)) - implicit lazy val arbLocalDateTime: Arbitrary[LocalDateTime] = - Arbitrary(arbInstant.arbitrary.map(_.atZone(ZoneOffset.UTC).toLocalDateTime)) - implicit lazy val arbOffsetTime: Arbitrary[OffsetTime] = - Arbitrary(arbInstant.arbitrary.map(_.atOffset(ZoneOffset.UTC).toOffsetTime)) - implicit lazy val arbDuration: Arbitrary[Duration] = - Arbitrary(Gen.posNum[Long].map(Duration.ofMillis)) - - // joda-time - implicit val arbJodaDate: Arbitrary[joda.LocalDate] = Arbitrary { - Arbitrary.arbitrary[LocalDate].map { ld => - new joda.LocalDate(ld.getYear, ld.getMonthValue, ld.getDayOfMonth) - } - } - implicit val arbJodaDateTime: Arbitrary[joda.DateTime] = Arbitrary { - Arbitrary.arbitrary[Instant].map { i => - new joda.DateTime(i.toEpochMilli, joda.DateTimeZone.UTC) - } - } - implicit val arbJodaLocalTime: Arbitrary[joda.LocalTime] = Arbitrary { - Arbitrary.arbitrary[LocalTime].map { lt => - joda.LocalTime.fromMillisOfDay(lt.toNanoOfDay / 1000) - } - } - implicit val arbJodaLocalDateTime: Arbitrary[joda.LocalDateTime] = Arbitrary { - Arbitrary.arbitrary[LocalDateTime].map { ldt => - joda.LocalDateTime.parse(ldt.toString) - } - } - implicit val arbJodaDuration: Arbitrary[joda.Duration] = - Arbitrary(Gen.posNum[Long].map(joda.Duration.millis)) - implicit val arbJodaInstant: Arbitrary[joda.Instant] = - Arbitrary(Gen.posNum[Long].map(l => new joda.Instant(l))) - // enum implicit lazy val arbJavaEnum: Arbitrary[JavaEnums.Color] = Arbitrary(Gen.oneOf(JavaEnums.Color.values.toSeq)) diff --git a/shared/src/test/scala/magnolify/shared/TimeArbitrary.scala b/shared/src/test/scala/magnolify/shared/TimeArbitrary.scala new file mode 100644 index 000000000..0e0a115d1 --- /dev/null +++ b/shared/src/test/scala/magnolify/shared/TimeArbitrary.scala @@ -0,0 +1,47 @@ +package magnolify.shared + +import org.joda.time as joda +import org.scalacheck.{Arbitrary, Gen} + +import java.time.{Duration, Instant, LocalDate, LocalDateTime, LocalTime, OffsetTime, ZoneOffset} + +trait TimeArbitrary { + implicit lazy val arbInstant: Arbitrary[Instant] = + Arbitrary(Gen.posNum[Long].map(Instant.ofEpochMilli)) + implicit lazy val arbLocalDate: Arbitrary[LocalDate] = + Arbitrary(Gen.chooseNum(0L, 365L * 100).map(LocalDate.ofEpochDay)) + implicit lazy val arbLocalTime: Arbitrary[LocalTime] = + Arbitrary(arbInstant.arbitrary.map(_.atZone(ZoneOffset.UTC).toLocalTime)) + implicit lazy val arbLocalDateTime: Arbitrary[LocalDateTime] = + Arbitrary(arbInstant.arbitrary.map(_.atZone(ZoneOffset.UTC).toLocalDateTime)) + implicit lazy val arbOffsetTime: Arbitrary[OffsetTime] = + Arbitrary(arbInstant.arbitrary.map(_.atOffset(ZoneOffset.UTC).toOffsetTime)) + implicit lazy val arbDuration: Arbitrary[Duration] = + Arbitrary(Gen.posNum[Long].map(Duration.ofMillis)) + + implicit val arbJodaDate: Arbitrary[joda.LocalDate] = Arbitrary { + Arbitrary.arbitrary[LocalDate].map { ld => + new joda.LocalDate(ld.getYear, ld.getMonthValue, ld.getDayOfMonth) + } + } + implicit val arbJodaDateTime: Arbitrary[joda.DateTime] = Arbitrary { + Arbitrary.arbitrary[Instant].map { i => + new joda.DateTime(i.toEpochMilli, joda.DateTimeZone.UTC) + } + } + implicit val arbJodaLocalTime: Arbitrary[joda.LocalTime] = Arbitrary { + Arbitrary.arbitrary[LocalTime].map { lt => + joda.LocalTime.fromMillisOfDay(lt.toNanoOfDay / 1000) + } + } + implicit val arbJodaLocalDateTime: Arbitrary[joda.LocalDateTime] = Arbitrary { + Arbitrary.arbitrary[LocalDateTime].map { ldt => + joda.LocalDateTime.parse(ldt.toString) + } + } + implicit val arbJodaDuration: Arbitrary[joda.Duration] = + Arbitrary(Gen.posNum[Long].map(joda.Duration.millis)) + implicit val arbJodaInstant: Arbitrary[joda.Instant] = + Arbitrary(Gen.posNum[Long].map(l => new joda.Instant(l))) + +} diff --git a/shared/src/test/scala/magnolify/shared/TimeSpec.scala b/shared/src/test/scala/magnolify/shared/TimeSpec.scala new file mode 100644 index 000000000..8571e004a --- /dev/null +++ b/shared/src/test/scala/magnolify/shared/TimeSpec.scala @@ -0,0 +1,125 @@ +package magnolify.shared + +import org.joda.time as joda +import org.scalacheck.* +import org.scalacheck.Prop.forAll + +class TimeSpec extends Properties("Time") with TimeArbitrary { + import Time._ + + case class Convert[T, U: Arbitrary, V: Arbitrary]( + name: String, + javaTo: T => U, + javaFrom: U => T, + jodaTo: T => V, + jodaFrom: V => T + ) { + def java = + property(name) = forAll((u: U) => (javaFrom andThen javaTo)(u) == u) + def joda = + property(s"$name-joda") = forAll((v: V) => (jodaFrom andThen jodaTo)(v) == v) + def roundtrip = + property(s"$name-roundtrip") = + forAll((u: U) => (javaFrom andThen jodaTo andThen jodaFrom andThen javaTo)(u) == u) + } + + val conversions: List[Convert[?, ?, ?]] = List( + Convert( + "millis-instant", + millisToInstant, + millisFromInstant, + millisToJodaInstant, + millisFromJodaInstant + ), + Convert( + "millis-localtime", + millisToLocalTime, + millisFromLocalTime, + millisToJodaLocalTime, + millisFromJodaLocalTime + ), + Convert( + "millis-localdatetime", + millisToLocalDateTime, + millisFromLocalDateTime, + millisToJodaLocalDateTime, + millisFromJodaLocalDateTime + ), + Convert( + "millis-duration", + millisToDuration, + millisFromDuration, + millisToJodaDuration, + millisFromJodaDuration + ), + Convert( + "micros-instant", + microsToInstant, + microsFromInstant, + microsToJodaInstant, + microsFromJodaInstant + ), + Convert( + "micros-localtime", + microsToLocalTime, + microsFromLocalTime, + microsToJodaLocalTime, + microsFromJodaLocalTime + ), + Convert( + "micros-localdatetime", + microsToLocalDateTime, + microsFromLocalDateTime, + microsToJodaLocalDateTime, + microsFromJodaLocalDateTime + ), + Convert( + "micros-duration", + microsToDuration, + microsFromDuration, + microsToJodaDuration, + microsFromJodaDuration + ), + Convert( + "nanos-instant", + nanosToInstant, + nanosFromInstant, + nanosToJodaInstant, + nanosFromJodaInstant + ), + Convert( + "nanos-localtime", + nanosToLocalTime, + nanosFromLocalTime, + nanosToJodaLocalTime, + nanosFromJodaLocalTime + ), + Convert( + "nanos-localdatetime", + nanosToLocalDateTime, + nanosFromLocalDateTime, + nanosToJodaLocalDateTime, + nanosFromJodaLocalDateTime + ), + Convert( + "nanos-duration", + nanosToDuration, + nanosFromDuration, + nanosToJodaDuration, + nanosFromJodaDuration + ) + ) + + conversions.foreach { c => + c.java + c.joda + c.roundtrip + } + + property(s"millis-datetime-joda") = + forAll((v: joda.DateTime) => (millisFromJodaDateTime andThen millisToJodaDateTime)(v) == v) + property(s"micros-datetime-joda") = + forAll((v: joda.DateTime) => (microsFromJodaDateTime andThen microsToJodaDateTime)(v) == v) + property(s"nanos-datetime-joda") = + forAll((v: joda.DateTime) => (nanosFromJodaDateTime andThen nanosToJodaDateTime)(v) == v) +} From c8fc829c05124776f95f761bb6e775b34ab9c12b Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 23 Aug 2024 11:13:34 -0400 Subject: [PATCH 16/31] fml --- .../scala/magnolify/shared/TimeArbitrary.scala | 16 ++++++++++++++++ .../test/scala/magnolify/shared/TimeSpec.scala | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/shared/src/test/scala/magnolify/shared/TimeArbitrary.scala b/shared/src/test/scala/magnolify/shared/TimeArbitrary.scala index 0e0a115d1..892f7e651 100644 --- a/shared/src/test/scala/magnolify/shared/TimeArbitrary.scala +++ b/shared/src/test/scala/magnolify/shared/TimeArbitrary.scala @@ -1,3 +1,19 @@ +/* + * Copyright 2024 Spotify AB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package magnolify.shared import org.joda.time as joda diff --git a/shared/src/test/scala/magnolify/shared/TimeSpec.scala b/shared/src/test/scala/magnolify/shared/TimeSpec.scala index 8571e004a..f3fc8ec92 100644 --- a/shared/src/test/scala/magnolify/shared/TimeSpec.scala +++ b/shared/src/test/scala/magnolify/shared/TimeSpec.scala @@ -1,3 +1,19 @@ +/* + * Copyright 2024 Spotify AB + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package magnolify.shared import org.joda.time as joda From f3f2fc78cc388dd70e7b7b804b1067ba65ded4ce Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 23 Aug 2024 11:20:02 -0400 Subject: [PATCH 17/31] _ --- shared/src/test/scala/magnolify/shared/TimeSpec.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shared/src/test/scala/magnolify/shared/TimeSpec.scala b/shared/src/test/scala/magnolify/shared/TimeSpec.scala index f3fc8ec92..36c5a2fef 100644 --- a/shared/src/test/scala/magnolify/shared/TimeSpec.scala +++ b/shared/src/test/scala/magnolify/shared/TimeSpec.scala @@ -133,9 +133,9 @@ class TimeSpec extends Properties("Time") with TimeArbitrary { } property(s"millis-datetime-joda") = - forAll((v: joda.DateTime) => (millisFromJodaDateTime andThen millisToJodaDateTime)(v) == v) + forAll((v: joda.DateTime) => (millisFromJodaDateTime _ andThen millisToJodaDateTime)(v) == v) property(s"micros-datetime-joda") = - forAll((v: joda.DateTime) => (microsFromJodaDateTime andThen microsToJodaDateTime)(v) == v) + forAll((v: joda.DateTime) => (microsFromJodaDateTime _ andThen microsToJodaDateTime)(v) == v) property(s"nanos-datetime-joda") = - forAll((v: joda.DateTime) => (nanosFromJodaDateTime andThen nanosToJodaDateTime)(v) == v) + forAll((v: joda.DateTime) => (nanosFromJodaDateTime _ andThen nanosToJodaDateTime)(v) == v) } From dc5a39e108b8a2fae8be335a7acbd1ccbfd73296 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 23 Aug 2024 13:12:27 -0400 Subject: [PATCH 18/31] coverage --- .../scala/magnolify/beam/BeamSchemaType.scala | 2 +- .../magnolify/beam/BeamSchemaTypeSuite.scala | 10 +++++---- .../magnolify/parquet/logical/package.scala | 4 ++-- .../magnolify/parquet/ParquetTypeSuite.scala | 21 +++++++++++++++++++ 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index 05db9b5ae..9f793a5ec 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -179,9 +179,9 @@ object BeamSchemaField { // BYTE An 8-bit signed value implicit val bsfByte: BeamSchemaField[Byte] = id[Byte](_ => FieldType.BYTE) - implicit val bsfChar: BeamSchemaField[Char] = from[Byte](_.toChar)(_.toByte) // INT16 A 16-bit signed value implicit val bsfShort: BeamSchemaField[Short] = id[Short](_ => FieldType.INT16) + implicit val bsfChar: BeamSchemaField[Char] = from[Short](_.toChar)(_.toShort) // INT32 A 32-bit signed value implicit val bsfInt: BeamSchemaField[Int] = id[Int](_ => FieldType.INT32) // INT64 A 64-bit signed value diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala index 129e81d66..297a8d895 100644 --- a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala @@ -29,7 +29,8 @@ import org.apache.beam.sdk.schemas.Schema import org.joda.time as joda import org.scalacheck.{Arbitrary, Gen, Prop} -import java.time.{Instant, LocalDate, LocalDateTime, LocalTime} +import java.nio.ByteBuffer +import java.time.{Duration, Instant, LocalDate, LocalDateTime, LocalTime} import java.util.UUID import scala.reflect.ClassTag import scala.jdk.CollectionConverters.* @@ -67,7 +68,7 @@ class BeamSchemaTypeSuite extends MagnolifySuite { test[Collections] test[MoreCollections] - test[Bs] + test[Others] test[Maps] test[Logical] test[Decimal] @@ -140,7 +141,7 @@ class BeamSchemaTypeSuite extends MagnolifySuite { } } -case class Bs(bs: ByteString) +case class Others(bs: ByteString, cs: CharSequence, bb: ByteBuffer, c: Char) case class Decimal(bd: BigDecimal, bdo: Option[BigDecimal]) case class Logical( u: UUID, @@ -160,7 +161,8 @@ case class JodaDate(jd: joda.LocalDate) case class JavaTime( i: Instant, dt: LocalDateTime, - t: LocalTime + t: LocalTime, + d: Duration ) case class JodaTime( i: joda.Instant, diff --git a/parquet/src/main/scala/magnolify/parquet/logical/package.scala b/parquet/src/main/scala/magnolify/parquet/logical/package.scala index e0b64d84c..d2fcc5bae 100644 --- a/parquet/src/main/scala/magnolify/parquet/logical/package.scala +++ b/parquet/src/main/scala/magnolify/parquet/logical/package.scala @@ -66,9 +66,9 @@ package object logical { override protected val unit = TimeUnit.NANOS // TIMESTAMP - implicit val pfTimestampMillis: Primitive[Instant] = + implicit val pfTimestampNanos: Primitive[Instant] = ParquetField.logicalType[Long](ts(true))(nanosToInstant)(nanosFromInstant) - implicit val pfLocalDateTimeMillis: Primitive[LocalDateTime] = + implicit val pfLocalDateTimeNanos: Primitive[LocalDateTime] = ParquetField.logicalType[Long](ts(false))(nanosToLocalDateTime)(nanosFromLocalDateTime) // TIME diff --git a/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala b/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala index b7d6ef3ab..6d1b8c892 100644 --- a/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala +++ b/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala @@ -138,6 +138,27 @@ class ParquetTypeSuite extends MagnolifySuite { Arbitrary(Gen.choose(-max, max).map(BigDecimal.apply)) } + test("Decimal range") { + intercept[IllegalArgumentException] { + ParquetField.decimal32(0, 0) + } + intercept[IllegalArgumentException] { + ParquetField.decimal32(1, 10) + } + intercept[IllegalArgumentException] { + ParquetField.decimal64(0, 0) + } + intercept[IllegalArgumentException] { + ParquetField.decimal64(1, 19) + } + intercept[IllegalArgumentException] { + ParquetField.decimalFixed(0, 1) + } + intercept[IllegalArgumentException] { + ParquetField.decimalFixed(2, 5) // capacity = 4 + } + } + { implicit val arbBigDecimal: Arbitrary[BigDecimal] = decimal(9) implicit val pfBigDecimal: ParquetField[BigDecimal] = ParquetField.decimal32(9, 0) From ec120ebde8a5fe6e4d9593df5a797f8ea2a8ea0f Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 23 Aug 2024 13:20:07 -0400 Subject: [PATCH 19/31] filter --- build.sbt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/build.sbt b/build.sbt index 97c540a09..743a71de1 100644 --- a/build.sbt +++ b/build.sbt @@ -233,6 +233,14 @@ ThisBuild / mimaBinaryIssueFilters ++= Seq( // incorrectly named implicit ProblemFilters.exclude[DirectMissingMethodProblem]( "magnolify.parquet.logical.package#micros.pfLocalDateTimeMillis" + ), + // incorrectly named implicit + ProblemFilters.exclude[DirectMissingMethodProblem]( + "magnolify.parquet.logical.package#nanos.pfTimestampMillis" + ), + // incorrectly named implicit + ProblemFilters.exclude[DirectMissingMethodProblem]( + "magnolify.parquet.logical.package#nanos.pfLocalDateTimeMillis" ) ) ThisBuild / tlVersionIntroduced := Map("3" -> "0.8.0") From 6c8c53b6f503c134d878ccebca209159cf719a99 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 23 Aug 2024 15:34:36 -0400 Subject: [PATCH 20/31] coverage --- .../magnolify/beam/logical/package.scala | 17 +++++---- .../magnolify/beam/BeamSchemaTypeSuite.scala | 36 +++++++++++-------- .../test/scala/magnolify/cats/TestEq.scala | 14 ++++---- docs/mapping.md | 2 +- .../magnolify/parquet/ParquetTypeSuite.scala | 27 ++++++++------ 5 files changed, 53 insertions(+), 43 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/logical/package.scala b/beam/src/main/scala/magnolify/beam/logical/package.scala index f7d4c472b..fcd93ff1e 100644 --- a/beam/src/main/scala/magnolify/beam/logical/package.scala +++ b/beam/src/main/scala/magnolify/beam/logical/package.scala @@ -20,6 +20,7 @@ import org.apache.beam.sdk.schemas.logicaltypes import org.apache.beam.sdk.schemas.Schema.FieldType import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes import org.joda.time as joda +import org.joda.time.chrono.ISOChronology import java.time as jt import java.time.temporal.ChronoField @@ -33,21 +34,19 @@ package object logical { private lazy val EpochJodaDate = new joda.LocalDate(1970, 1, 1) implicit val bsfJodaLocalDate: BeamSchemaField[joda.LocalDate] = BeamSchemaField.from[jt.LocalDate](jtld => - EpochJodaDate.plusDays(jtld.get(ChronoField.EPOCH_DAY)) + EpochJodaDate.plusDays(jtld.getLong(ChronoField.EPOCH_DAY).toInt) )(d => jt.LocalDate.ofEpochDay(joda.Days.daysBetween(EpochJodaDate, d).getDays.toLong)) } object millis { - implicit val bsfInstantMillis: BeamSchemaField[jt.Instant] = - BeamSchemaField.id[jt.Instant](_ => FieldType.DATETIME) - implicit val bsfJodaInstantMillis: BeamSchemaField[joda.Instant] = - BeamSchemaField.from[jt.Instant](i => millisToJodaInstant(millisFromInstant(i)))(i => - millisToInstant(millisFromJodaInstant(i)) + implicit lazy val bsfInstantMillis: BeamSchemaField[jt.Instant] = + BeamSchemaField.from[joda.Instant](i => millisToInstant(millisFromJodaInstant(i)))(i => + millisToJodaInstant(millisFromInstant(i)) ) + implicit val bsfJodaInstantMillis: BeamSchemaField[joda.Instant] = + BeamSchemaField.id[joda.Instant](_ => FieldType.DATETIME) implicit val bsfJodaDateTimeMillis: BeamSchemaField[joda.DateTime] = - BeamSchemaField.from[jt.Instant](i => millisToJodaDateTime(millisFromInstant(i)))(dt => - millisToInstant(millisFromJodaDateTime(dt)) - ) + BeamSchemaField.from[joda.Instant](_.toDateTime(ISOChronology.getInstanceUTC))(_.toInstant) implicit val bsfLocalTimeMillis: BeamSchemaField[jt.LocalTime] = BeamSchemaField.from[Int](millisToLocalTime)(millisFromLocalTime) diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala index 297a8d895..d2764cfab 100644 --- a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala @@ -39,12 +39,17 @@ class BeamSchemaTypeSuite extends MagnolifySuite { private def test[T: Arbitrary: ClassTag](implicit bst: BeamSchemaType[T], eq: Eq[T] + ): Unit = testNamed[T](className[T]) + + private def testNamed[T: Arbitrary](name: String)(implicit + bst: BeamSchemaType[T], + eq: Eq[T] ): Unit = { // Ensure serializable even after evaluation of `schema` bst.schema: Unit ensureSerializable(bst) - property(className[T]) { + property(name) { Prop.forAll { (t: T) => val converted = bst.apply(t) val roundtripped = bst.apply(converted) @@ -68,6 +73,7 @@ class BeamSchemaTypeSuite extends MagnolifySuite { test[Collections] test[MoreCollections] + test[Empty] test[Others] test[Maps] test[Logical] @@ -80,28 +86,28 @@ class BeamSchemaTypeSuite extends MagnolifySuite { test[UnsafeEnums] } - property("Date") { + { import magnolify.beam.logical.date.* test[JavaDate] test[JodaDate] } - property("Millis") { + { import magnolify.beam.logical.millis.* - test[JavaTime] - test[JodaTime] + testNamed[JavaTime]("JavaMillis") + testNamed[JodaTime]("JodaMillis") } - property("Micros") { + { import magnolify.beam.logical.micros.* - test[JavaTime] - test[JodaTime] + testNamed[JavaTime]("JavaMicros") + testNamed[JodaTime]("JodaMicros") } - property("Nanos") { + { import magnolify.beam.logical.nanos.* - test[JavaTime] - test[JodaTime] + testNamed[JavaTime]("JavaNanos") + testNamed[JodaTime]("JodaNanos") } { @@ -109,9 +115,8 @@ class BeamSchemaTypeSuite extends MagnolifySuite { BeamSchemaType[LowerCamel](CaseMapper(_.toUpperCase)) test[LowerCamel] - test("LowerCamel mapping") { + { val schema = bst.schema - val fields = LowerCamel.fields.map(_.toUpperCase) assertEquals(schema.getFields.asScala.map(_.getName()).toSeq, fields) assertEquals( @@ -121,7 +126,7 @@ class BeamSchemaTypeSuite extends MagnolifySuite { } } - test("ValueClass") { + { // value classes should act only as fields intercept[IllegalArgumentException] { BeamSchemaType[ValueClass] @@ -135,12 +140,13 @@ class BeamSchemaTypeSuite extends MagnolifySuite { assert(record.getValue[String]("vc").equals("String")) } - property("Sql") { + { import magnolify.beam.logical.sql.* test[Sql] } } +case class Empty() case class Others(bs: ByteString, cs: CharSequence, bb: ByteBuffer, c: Char) case class Decimal(bd: BigDecimal, bdo: Option[BigDecimal]) case class Logical( diff --git a/cats/src/test/scala/magnolify/cats/TestEq.scala b/cats/src/test/scala/magnolify/cats/TestEq.scala index 22ecb0233..db0c76ce5 100644 --- a/cats/src/test/scala/magnolify/cats/TestEq.scala +++ b/cats/src/test/scala/magnolify/cats/TestEq.scala @@ -45,7 +45,7 @@ object TestEq { // Can only be used as a key value list m.map { case (k, v) => k.toString -> v } } - implicit val eqByteBuffer: Eq[ByteBuffer] = Eq.by(_.array()) + implicit lazy val eqByteBuffer: Eq[ByteBuffer] = Eq.by(_.array()) // java-time implicit lazy val eqInstant: Eq[Instant] = Eq.fromUniversalEquals @@ -56,12 +56,12 @@ object TestEq { implicit lazy val eqDuration: Eq[Duration] = Eq.fromUniversalEquals // joda-time - implicit val eqJodaDate: Eq[joda.LocalDate] = Eq.fromUniversalEquals - implicit val eqJodaDateTime: Eq[joda.DateTime] = Eq.fromUniversalEquals - implicit val eqJodaLocalTime: Eq[joda.LocalTime] = Eq.fromUniversalEquals - implicit val eqJodaLocalDateTime: Eq[joda.LocalDateTime] = Eq.fromUniversalEquals - implicit val eqJodaDuration: Eq[joda.Duration] = Eq.fromUniversalEquals - implicit val eqJodaInstant: Eq[joda.Instant] = Eq.fromUniversalEquals + implicit lazy val eqJodaDate: Eq[joda.LocalDate] = Eq.fromUniversalEquals + implicit lazy val eqJodaDateTime: Eq[joda.DateTime] = Eq.fromUniversalEquals + implicit lazy val eqJodaLocalTime: Eq[joda.LocalTime] = Eq.fromUniversalEquals + implicit lazy val eqJodaLocalDateTime: Eq[joda.LocalDateTime] = Eq.fromUniversalEquals + implicit lazy val eqJodaDuration: Eq[joda.Duration] = Eq.fromUniversalEquals + implicit lazy val eqJodaInstant: Eq[joda.Instant] = Eq.fromUniversalEquals // enum implicit lazy val eqJavaEnum: Eq[JavaEnums.Color] = Eq.fromUniversalEquals diff --git a/docs/mapping.md b/docs/mapping.md index c86db47e1..cb5284b94 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -60,5 +60,5 @@ one of: `magnolify.parquet.ParquetField.{decimal32, decimal64, decimalFixed, decimalBinary}`. 15. Map key type in avro is fixed to string. Scala Map key type must be either `String` or `CharSequence`. 16. Beam logical [Enumeration type](https://beam.apache.org/documentation/programming-guide/#enumerationtype) -17. See [beam.md][protobuf.md](https://github.com/spotify/magnolify/blob/master/docs/beam.md) for details +17. See [beam.md](https://github.com/spotify/magnolify/blob/master/docs/beam.md) for details 18. Beam logical [UUID type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/UuidLogicalType.html) diff --git a/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala b/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala index 6d1b8c892..53357fb8a 100644 --- a/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala +++ b/parquet/src/test/scala/magnolify/parquet/ParquetTypeSuite.scala @@ -43,12 +43,17 @@ class ParquetTypeSuite extends MagnolifySuite { private def test[T: Arbitrary: ClassTag](implicit t: ParquetType[T], eq: Eq[T] + ): Unit = testNamed[T](className[T]) + + private def testNamed[T: Arbitrary](name: String)(implicit + t: ParquetType[T], + eq: Eq[T] ): Unit = { // Ensure serializable even after evaluation of `schema` t.schema: Unit val tpe = ensureSerializable(t) - property(className[T]) { + property(name) { Prop.forAll { (t: T) => val out = new TestOutputFile val writer = tpe.writeBuilder(out).build() @@ -162,43 +167,43 @@ class ParquetTypeSuite extends MagnolifySuite { { implicit val arbBigDecimal: Arbitrary[BigDecimal] = decimal(9) implicit val pfBigDecimal: ParquetField[BigDecimal] = ParquetField.decimal32(9, 0) - test[Decimal] + testNamed[Decimal]("Decimal32") } { implicit val arbBigDecimal: Arbitrary[BigDecimal] = decimal(18) implicit val pfBigDecimal: ParquetField[BigDecimal] = ParquetField.decimal64(18, 0) - test[Decimal] + testNamed[Decimal]("Decimal64") } { implicit val arbBigDecimal: Arbitrary[BigDecimal] = decimal(18) // math.floor(math.log10(math.pow(2, 8*8-1) - 1)) = 18 digits implicit val pfBigDecimal: ParquetField[BigDecimal] = ParquetField.decimalFixed(8, 18, 0) - test[Decimal] + testNamed[Decimal]("DecimalFixed") } { implicit val arbBigDecimal: Arbitrary[BigDecimal] = decimal(20) implicit val pfBigDecimal: ParquetField[BigDecimal] = ParquetField.decimalBinary(20, 0) - test[Decimal] + testNamed[Decimal]("DecimalBinary") } test[Logical] - property("Millis") { + { import magnolify.parquet.logical.millis._ - test[Time] + testNamed[Time]("TimeMillis") } - property("Micros") { + { import magnolify.parquet.logical.micros._ - test[Time] + testNamed[Time]("TimeMicros") } - property("Nanos") { + { import magnolify.parquet.logical.nanos._ - test[Time] + testNamed[Time]("TimeNanos") } { From 641e87eac2a5cd80bd8608f78939c9f651963404 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Tue, 27 Aug 2024 10:10:56 -0400 Subject: [PATCH 21/31] comments --- .../scala/magnolify/beam/BeamSchemaType.scala | 17 +++-------------- build.sbt | 4 ++-- docs/mapping.md | 2 +- 3 files changed, 6 insertions(+), 17 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index 9f793a5ec..49e3b9a07 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -34,7 +34,7 @@ import scala.jdk.CollectionConverters.* // https://beam.apache.org/documentation/programming-guide/#schema-definition sealed trait BeamSchemaType[T] extends Converter[T, Row, Row] { - val schema: Schema + def schema: Schema def apply(r: Row): T = from(r) def apply(t: T): Row = to(t) } @@ -159,19 +159,8 @@ object BeamSchemaField { caseClass.construct(p => p.typeclass.fromAny(v.getValue[Any](p.index))(cm)) override def to(v: T)(cm: CaseMapper): Row = { - val zero: Either[Row.Builder, Row.FieldValueBuilder] = Left(Row.withSchema(schema(cm))) - val eitherBuilder = caseClass.parameters - .foldLeft(zero) { (eitherBuilder, p) => - val value = p.typeclass.to(p.dereference(v))(cm) - eitherBuilder match { - case Left(rowBuilder) => Right(rowBuilder.withFieldValue(p.index, value)) - case Right(fieldBuilder) => Right(fieldBuilder.withFieldValue(p.index, value)) - } - } - eitherBuilder match { - case Left(rb) => rb.build() - case Right(fb) => fb.build() - } + val values = caseClass.parameters.map(p => p.typeclass.to(p.dereference(v))(cm)) + Row.withSchema(schema(cm)).addValues(values: _*).build() } } } diff --git a/build.sbt b/build.sbt index 743a71de1..3753a0c00 100644 --- a/build.sbt +++ b/build.sbt @@ -129,7 +129,7 @@ ThisBuild / crossScalaVersions := Seq(scala3, scala213, scala212) ThisBuild / githubWorkflowTargetBranches := Seq("main") ThisBuild / githubWorkflowJavaVersions := Seq(java17, java11) ThisBuild / tlJdkRelease := Some(8) -ThisBuild / tlFatalWarnings := false +ThisBuild / tlFatalWarnings := true ThisBuild / tlCiHeaderCheck := true ThisBuild / tlCiScalafmtCheck := true ThisBuild / tlCiDocCheck := true @@ -522,7 +522,7 @@ lazy val beam = project description := "Magnolia add-on for Apache Beam", libraryDependencies ++= Seq( "org.apache.beam" % "beam-sdks-java-core" % beamVersion % Provided, - "com.google.protobuf" % "protobuf-java" % protobufVersion % ProtobufConfig + "com.google.protobuf" % "protobuf-java" % protobufVersion % Provided ), // TODO remove this line after release tlMimaPreviousVersions := Set.empty diff --git a/docs/mapping.md b/docs/mapping.md index cb5284b94..7f5450ed1 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -61,4 +61,4 @@ 15. Map key type in avro is fixed to string. Scala Map key type must be either `String` or `CharSequence`. 16. Beam logical [Enumeration type](https://beam.apache.org/documentation/programming-guide/#enumerationtype) 17. See [beam.md](https://github.com/spotify/magnolify/blob/master/docs/beam.md) for details -18. Beam logical [UUID type](https://beam.apache.org/releases/javadoc/2.58.1/org/apache/beam/sdk/schemas/logicaltypes/UuidLogicalType.html) +18. Beam logical [UUID type](https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/schemas/logicaltypes/UuidLogicalType.html) From 337d487333ac994acef05616a00d1406f668f6fd Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Tue, 27 Aug 2024 10:32:14 -0400 Subject: [PATCH 22/31] 212 --- beam/src/main/scala/magnolify/beam/BeamSchemaType.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index 49e3b9a07..c35e529c1 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -159,8 +159,10 @@ object BeamSchemaField { caseClass.construct(p => p.typeclass.fromAny(v.getValue[Any](p.index))(cm)) override def to(v: T)(cm: CaseMapper): Row = { - val values = caseClass.parameters.map(p => p.typeclass.to(p.dereference(v))(cm)) - Row.withSchema(schema(cm)).addValues(values: _*).build() + val values = caseClass.parameters.map { p => + p.typeclass.to(p.dereference(v))(cm).asInstanceOf[Object] + } + Row.withSchema(schema(cm)).addValues(values.asJava).build() } } } From 46f30ff89402169975a19d75d3df73c935f91769 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Tue, 27 Aug 2024 16:03:47 -0400 Subject: [PATCH 23/31] Add OneOf support --- .../scala/magnolify/beam/BeamSchemaType.scala | 60 ++++++++++++------- .../magnolify/beam/BeamSchemaTypeSuite.scala | 8 +++ test/src/test/scala/magnolify/test/ADT.scala | 3 + 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index c35e529c1..83d18ae64 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -19,7 +19,7 @@ package magnolify.beam import magnolia1.* import magnolify.shared.* import org.apache.beam.sdk.schemas.Schema -import org.apache.beam.sdk.schemas.Schema.FieldType +import org.apache.beam.sdk.schemas.Schema.{Field, FieldType} import org.apache.beam.sdk.values.Row import com.google.protobuf.ByteString import magnolify.shims.FactoryCompat @@ -27,7 +27,6 @@ import org.apache.beam.sdk.schemas.logicaltypes import java.nio.ByteBuffer import java.util as ju -import scala.annotation.implicitNotFound import scala.collection.compat.* import scala.collection.concurrent import scala.jdk.CollectionConverters.* @@ -112,26 +111,43 @@ object BeamSchemaField { type Typeclass[T] = BeamSchemaField[T] implicit def gen[T]: BeamSchemaField[T] = macro Magnolia.gen[T] - // TODO beam schemas support OneOf - @implicitNotFound("Cannot derive BeamSchemaField for sealed trait") - private sealed trait Dispatchable[T] - def split[T: Dispatchable](sealedTrait: SealedTrait[Typeclass, T]): BeamSchemaField[T] = ??? - // new BeamSchemaField[T] { - // override type FromT = ??? - // override type ToT = ??? - // override def fieldType(cm: CaseMapper): FieldType = { - // FieldType.logicalType( - // logicaltypes.OneOfType.create( - // sealedTrait.subtypes.map { sub => - // Field.of(s"${sub.typeName.owner}.${sub.typeName.short}", sub.typeclass.fieldType(cm)) - // } - // .asJava - // ) - // ) - // } - // override def from(v: this.type)(cm: CaseMapper): T = ??? - // override def to(v: T)(cm: CaseMapper): this.type = ??? - // } + def split[T]( + sealedTrait: SealedTrait[Typeclass, T] + )(implicit r: shapeless.Refute[EnumType[T]]): BeamSchemaField[T] = + new BeamSchemaField[T] { + override type FromT = logicaltypes.OneOfType.Value + override type ToT = logicaltypes.OneOfType.Value + + private def enumName(sub: Subtype[Typeclass, T]): String = + s"${sub.typeName.owner}.${sub.typeName.short}" + + @transient private lazy val beamOneOfTypeCache + : concurrent.Map[ju.UUID, logicaltypes.OneOfType] = concurrent.TrieMap.empty + private def beamOneOfType(cm: CaseMapper): logicaltypes.OneOfType = + beamOneOfTypeCache.getOrElseUpdate( + cm.uuid, + logicaltypes.OneOfType.create( + sealedTrait.subtypes.map { sub => + Field.of(enumName(sub), sub.typeclass.fieldType(cm)) + }.asJava + ) + ) + + override def fieldType(cm: CaseMapper): FieldType = + FieldType.logicalType(beamOneOfType(cm)) + def from(v: logicaltypes.OneOfType.Value)(cm: CaseMapper): T = { + val idx = v.getCaseType.getValue + sealedTrait.subtypes.find(_.index == idx) match { + case None => throw new IllegalArgumentException(s"OneOf index not found: [$idx]") + case Some(sub) => sub.typeclass.fromAny(v.getValue)(cm) + } + } + + def to(v: T)(cm: CaseMapper): logicaltypes.OneOfType.Value = + sealedTrait.split(v)(sub => + beamOneOfType(cm).createValue(enumName(sub), sub.typeclass.to(sub.cast(v))(cm)) + ) + } def join[T](caseClass: CaseClass[Typeclass, T]): BeamSchemaField[T] = { if (caseClass.isValueClass) { diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala index d2764cfab..d1ca610e3 100644 --- a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala @@ -23,6 +23,7 @@ import magnolify.cats.TestEq.* import magnolify.scalacheck.auto.* import magnolify.scalacheck.TestArbitrary.* import magnolify.shared.CaseMapper +import magnolify.test.ADT import magnolify.test.MagnolifySuite import magnolify.test.Simple.* import org.apache.beam.sdk.schemas.Schema @@ -79,6 +80,11 @@ class BeamSchemaTypeSuite extends MagnolifySuite { test[Logical] test[Decimal] + { + import magnolify.shared.TestEnumType._ + test[SealedTest] + } + { import magnolify.beam.unsafe._ import magnolify.shared.TestEnumType._ @@ -191,3 +197,5 @@ case class Maps( mu: Map[UUID, UUID], mlo: Map[Option[UUID], Option[UUID]] ) + +case class SealedTest(shape: ADT.Shape, point: ADT.Rect, enumColor: ADT.Color) diff --git a/test/src/test/scala/magnolify/test/ADT.scala b/test/src/test/scala/magnolify/test/ADT.scala index 084b54db7..780593bf5 100644 --- a/test/src/test/scala/magnolify/test/ADT.scala +++ b/test/src/test/scala/magnolify/test/ADT.scala @@ -31,6 +31,9 @@ object ADT { case object Space extends Shape case class Point(x: Int, y: Int) extends Shape case class Circle(r: Int) extends Shape + sealed trait RectShape extends Shape + case class Square(origin: Point, sideLength: Int) extends RectShape + case class Rect(origin: Point, width: Int, length: Int) extends RectShape @ScalaAnnotation("Color") sealed trait Color From 92f84624f5e719a0216fcf9760bf54e308e23b9b Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 29 Aug 2024 16:23:44 -0400 Subject: [PATCH 24/31] yes --- beam/src/main/scala/magnolify/beam/BeamSchemaType.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala index 83d18ae64..ccd13897c 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala @@ -45,10 +45,10 @@ object BeamSchemaType { def apply[T](cm: CaseMapper)(implicit f: BeamSchemaField[T]): BeamSchemaType[T] = { f match { case r: BeamSchemaField.Record[_] => - r.schema(cm) // fail fast on bad annotations + val mappedSchema = r.schema(cm) // fail fast on bad annotations new BeamSchemaType[T] { private val caseMapper: CaseMapper = cm - @transient override lazy val schema: Schema = r.schema(caseMapper) + override lazy val schema: Schema = mappedSchema override def from(v: Row): T = r.from(v)(caseMapper) override def to(v: T): Row = r.to(v)(caseMapper) From 4e6c3afcb2255871693146b373c6ab3400cdf315 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 30 Aug 2024 08:21:39 -0400 Subject: [PATCH 25/31] naming --- test/src/test/scala/magnolify/test/ADT.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/src/test/scala/magnolify/test/ADT.scala b/test/src/test/scala/magnolify/test/ADT.scala index 780593bf5..37270dd7f 100644 --- a/test/src/test/scala/magnolify/test/ADT.scala +++ b/test/src/test/scala/magnolify/test/ADT.scala @@ -30,10 +30,10 @@ object ADT { sealed trait Shape case object Space extends Shape case class Point(x: Int, y: Int) extends Shape - case class Circle(r: Int) extends Shape - sealed trait RectShape extends Shape - case class Square(origin: Point, sideLength: Int) extends RectShape - case class Rect(origin: Point, width: Int, length: Int) extends RectShape + case class Circle(center: Point, r: Int) extends Shape + sealed trait Quadrilateral extends Shape + case class Square(origin: Point, sideLength: Int) extends Quadrilateral + case class Rect(origin: Point, width: Int, length: Int) extends Quadrilateral @ScalaAnnotation("Color") sealed trait Color From 84cfc8414951627e7f44d3a59550e0ca41e7304c Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 30 Aug 2024 08:49:17 -0400 Subject: [PATCH 26/31] breaks scalacheck test, reverting --- test/src/test/scala/magnolify/test/ADT.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/test/scala/magnolify/test/ADT.scala b/test/src/test/scala/magnolify/test/ADT.scala index 37270dd7f..bea9295a0 100644 --- a/test/src/test/scala/magnolify/test/ADT.scala +++ b/test/src/test/scala/magnolify/test/ADT.scala @@ -30,7 +30,7 @@ object ADT { sealed trait Shape case object Space extends Shape case class Point(x: Int, y: Int) extends Shape - case class Circle(center: Point, r: Int) extends Shape + case class Circle(r: Int) extends Shape sealed trait Quadrilateral extends Shape case class Square(origin: Point, sideLength: Int) extends Quadrilateral case class Rect(origin: Point, width: Int, length: Int) extends Quadrilateral From fd3896e2bac5389db1d56346fe28adff7d75d601 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 30 Aug 2024 08:51:41 -0400 Subject: [PATCH 27/31] beamschematype -> rowtype --- .../beam/{BeamSchemaType.scala => RowType.scala} | 14 +++++++------- ...mSchemaTypeSuite.scala => RowTypeSuite.scala} | 14 +++++++------- docs/beam.md | 16 ++++++++-------- 3 files changed, 22 insertions(+), 22 deletions(-) rename beam/src/main/scala/magnolify/beam/{BeamSchemaType.scala => RowType.scala} (97%) rename beam/src/test/scala/magnolify/beam/{BeamSchemaTypeSuite.scala => RowTypeSuite.scala} (93%) diff --git a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala b/beam/src/main/scala/magnolify/beam/RowType.scala similarity index 97% rename from beam/src/main/scala/magnolify/beam/BeamSchemaType.scala rename to beam/src/main/scala/magnolify/beam/RowType.scala index ccd13897c..d1424cbe5 100644 --- a/beam/src/main/scala/magnolify/beam/BeamSchemaType.scala +++ b/beam/src/main/scala/magnolify/beam/RowType.scala @@ -32,21 +32,21 @@ import scala.collection.concurrent import scala.jdk.CollectionConverters.* // https://beam.apache.org/documentation/programming-guide/#schema-definition -sealed trait BeamSchemaType[T] extends Converter[T, Row, Row] { +sealed trait RowType[T] extends Converter[T, Row, Row] { def schema: Schema def apply(r: Row): T = from(r) def apply(t: T): Row = to(t) } -object BeamSchemaType { - implicit def apply[T: BeamSchemaField]: BeamSchemaType[T] = - BeamSchemaType[T](CaseMapper.identity) +object RowType { + implicit def apply[T: BeamSchemaField]: RowType[T] = + RowType[T](CaseMapper.identity) - def apply[T](cm: CaseMapper)(implicit f: BeamSchemaField[T]): BeamSchemaType[T] = { + def apply[T](cm: CaseMapper)(implicit f: BeamSchemaField[T]): RowType[T] = { f match { case r: BeamSchemaField.Record[_] => val mappedSchema = r.schema(cm) // fail fast on bad annotations - new BeamSchemaType[T] { + new RowType[T] { private val caseMapper: CaseMapper = cm override lazy val schema: Schema = mappedSchema @@ -55,7 +55,7 @@ object BeamSchemaType { } case _ => throw new IllegalArgumentException( - s"BeamSchemaType can only be created from Record. Got $f" + s"RowType can only be created from Record. Got $f" ) } } diff --git a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala b/beam/src/test/scala/magnolify/beam/RowTypeSuite.scala similarity index 93% rename from beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala rename to beam/src/test/scala/magnolify/beam/RowTypeSuite.scala index d1ca610e3..d7a08496b 100644 --- a/beam/src/test/scala/magnolify/beam/BeamSchemaTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/RowTypeSuite.scala @@ -36,14 +36,14 @@ import java.util.UUID import scala.reflect.ClassTag import scala.jdk.CollectionConverters.* -class BeamSchemaTypeSuite extends MagnolifySuite { +class RowTypeSuite extends MagnolifySuite { private def test[T: Arbitrary: ClassTag](implicit - bst: BeamSchemaType[T], + bst: RowType[T], eq: Eq[T] ): Unit = testNamed[T](className[T]) private def testNamed[T: Arbitrary](name: String)(implicit - bst: BeamSchemaType[T], + bst: RowType[T], eq: Eq[T] ): Unit = { // Ensure serializable even after evaluation of `schema` @@ -117,8 +117,8 @@ class BeamSchemaTypeSuite extends MagnolifySuite { } { - implicit val bst: BeamSchemaType[LowerCamel] = - BeamSchemaType[LowerCamel](CaseMapper(_.toUpperCase)) + implicit val bst: RowType[LowerCamel] = + RowType[LowerCamel](CaseMapper(_.toUpperCase)) test[LowerCamel] { @@ -135,10 +135,10 @@ class BeamSchemaTypeSuite extends MagnolifySuite { { // value classes should act only as fields intercept[IllegalArgumentException] { - BeamSchemaType[ValueClass] + RowType[ValueClass] } - implicit val bst: BeamSchemaType[HasValueClass] = BeamSchemaType[HasValueClass] + implicit val bst: RowType[HasValueClass] = RowType[HasValueClass] test[HasValueClass] assert(bst.schema.getField("vc").getType == Schema.FieldType.STRING) diff --git a/docs/beam.md b/docs/beam.md index 01f787570..813bc33dd 100644 --- a/docs/beam.md +++ b/docs/beam.md @@ -1,6 +1,6 @@ # Beam -`BeamSchemaType[T]` provides conversion between Scala type `T` and a [Beam Schema](https://beam.apache.org/documentation/programming-guide/#schema-definition). Custom support for type `T` can be added with an implicit intsance of `BeamSchemaField[T]`. +`RowType[T]` provides conversion between Scala type `T` and a [Beam Schema](https://beam.apache.org/documentation/programming-guide/#schema-definition). Custom support for type `T` can be added with an implicit intsance of `BeamSchemaField[T]`. ```scala mdoc:compile-only import java.net.URI @@ -13,12 +13,12 @@ import magnolify.beam.* // Encode custom type URI as String implicit val uriField: BeamSchemaField[URI] = BeamSchemaField.from[String](URI.create)(_.toString) -val beamSchemaType = BeamSchemaType[Outer] -val row = beamSchemaType.to(record) -val copy: Outer = beamSchemaType.from(row) +val rowType = RowType[Outer] +val row = rowType.to(record) +val copy: Outer = rowType.from(row) // Beam Schema -val schema = beamSchemaType.schema +val schema = rowType.schema ``` ## Enums @@ -47,7 +47,7 @@ SQL-compatible logical types are supported via `import magnolify.beam.logical.sq ## Case mapping -To use a different field case format in target records, add an optional `CaseMapper` argument to `BeamSchemaType`: +To use a different field case format in target records, add an optional `CaseMapper` argument to `RowType`: ```scala mdoc:compile-only import magnolify.beam.* @@ -57,6 +57,6 @@ import com.google.common.base.CaseFormat case class LowerCamel(firstName: String, lastName: String) val toSnakeCase = CaseFormat.LOWER_CAMEL.converterTo(CaseFormat.LOWER_UNDERSCORE).convert _ -val beamSchemaType = BeamSchemaType[LowerCamel](CaseMapper(toSnakeCase)) -beamSchemaType.to(LowerCamel("John", "Doe")) // Row(first_name: John, last_name: Doe) +val rowType = RowType[LowerCamel](CaseMapper(toSnakeCase)) +rowType.to(LowerCamel("John", "Doe")) // Row(first_name: John, last_name: Doe) ``` \ No newline at end of file From aa9f05b8a9fa0746dee31c116b11833cc69fb152 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Fri, 30 Aug 2024 09:01:02 -0400 Subject: [PATCH 28/31] rm --- beam/src/main/scala/magnolify/beam/RowType.scala | 2 -- beam/src/test/scala/magnolify/beam/RowTypeSuite.scala | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/RowType.scala b/beam/src/main/scala/magnolify/beam/RowType.scala index d1424cbe5..4da64d11d 100644 --- a/beam/src/main/scala/magnolify/beam/RowType.scala +++ b/beam/src/main/scala/magnolify/beam/RowType.scala @@ -199,8 +199,6 @@ object BeamSchemaField { implicit val bsfDouble: BeamSchemaField[Double] = id[Double](_ => FieldType.DOUBLE) // STRING A string implicit val bsfString: BeamSchemaField[String] = id[String](_ => FieldType.STRING) - implicit val bsfCharSeq: BeamSchemaField[CharSequence] = - from[String](_.asInstanceOf[CharSequence])(_.toString) // BOOLEAN A boolean value implicit val bsfBoolean: BeamSchemaField[Boolean] = id[Boolean](_ => FieldType.BOOLEAN) // BYTES A raw byte array diff --git a/beam/src/test/scala/magnolify/beam/RowTypeSuite.scala b/beam/src/test/scala/magnolify/beam/RowTypeSuite.scala index d7a08496b..4f7a54836 100644 --- a/beam/src/test/scala/magnolify/beam/RowTypeSuite.scala +++ b/beam/src/test/scala/magnolify/beam/RowTypeSuite.scala @@ -153,7 +153,7 @@ class RowTypeSuite extends MagnolifySuite { } case class Empty() -case class Others(bs: ByteString, cs: CharSequence, bb: ByteBuffer, c: Char) +case class Others(bs: ByteString, bb: ByteBuffer, c: Char) case class Decimal(bd: BigDecimal, bdo: Option[BigDecimal]) case class Logical( u: UUID, From 9d7f1f1dec82bb4ef49ecf835e07c76b9ad4bd30 Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Tue, 3 Sep 2024 14:25:48 -0400 Subject: [PATCH 29/31] fix --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 3753a0c00..ad9e1b111 100644 --- a/build.sbt +++ b/build.sbt @@ -417,7 +417,7 @@ lazy val test = project lazy val scalacheck = project .in(file("scalacheck")) .dependsOn( - shared % "test->test,compile->compile", + shared % "compile,test->test", test % "test->test" ) .settings( From dd11adb66f02b84f768638c235e2eef3de418d8e Mon Sep 17 00:00:00 2001 From: Kellen Dye Date: Thu, 5 Sep 2024 07:01:18 -0400 Subject: [PATCH 30/31] rn --- .../main/scala/magnolify/beam/RowType.scala | 96 ++++++------ .../magnolify/beam/logical/package.scala | 140 +++++++++--------- .../scala/magnolify/beam/unsafe/package.scala | 4 +- docs/beam.md | 4 +- 4 files changed, 120 insertions(+), 124 deletions(-) diff --git a/beam/src/main/scala/magnolify/beam/RowType.scala b/beam/src/main/scala/magnolify/beam/RowType.scala index 4da64d11d..94479eedc 100644 --- a/beam/src/main/scala/magnolify/beam/RowType.scala +++ b/beam/src/main/scala/magnolify/beam/RowType.scala @@ -39,12 +39,11 @@ sealed trait RowType[T] extends Converter[T, Row, Row] { } object RowType { - implicit def apply[T: BeamSchemaField]: RowType[T] = - RowType[T](CaseMapper.identity) + implicit def apply[T: RowField]: RowType[T] = RowType[T](CaseMapper.identity) - def apply[T](cm: CaseMapper)(implicit f: BeamSchemaField[T]): RowType[T] = { + def apply[T](cm: CaseMapper)(implicit f: RowField[T]): RowType[T] = { f match { - case r: BeamSchemaField.Record[_] => + case r: RowField.Record[_] => val mappedSchema = r.schema(cm) // fail fast on bad annotations new RowType[T] { private val caseMapper: CaseMapper = cm @@ -61,7 +60,7 @@ object RowType { } } -sealed trait BeamSchemaField[T] extends Serializable { +sealed trait RowField[T] extends Serializable { type FromT type ToT def fieldType(cm: CaseMapper): FieldType @@ -70,32 +69,32 @@ sealed trait BeamSchemaField[T] extends Serializable { def fromAny(v: Any)(cm: CaseMapper): T = from(v.asInstanceOf[FromT])(cm) } -object BeamSchemaField { - sealed trait Aux[T, From, To] extends BeamSchemaField[T] { +object RowField { + sealed trait Aux[T, From, To] extends RowField[T] { override type FromT = From override type ToT = To } private[magnolify] def aux[T, From, To]( ft: CaseMapper => FieldType - )(fromFn: From => T)(toFn: T => To): BeamSchemaField[T] = + )(fromFn: From => T)(toFn: T => To): RowField[T] = new Aux[T, From, To] { override def fieldType(cm: CaseMapper): FieldType = ft(cm) override def from(v: FromT)(cm: CaseMapper): T = fromFn(v) override def to(v: T)(cm: CaseMapper): ToT = toFn(v) } - private[magnolify] def id[T](ft: CaseMapper => FieldType): BeamSchemaField[T] = + private[magnolify] def id[T](ft: CaseMapper => FieldType): RowField[T] = aux[T, T, T](ft)(identity)(identity) def from[T]: FromWord[T] = new FromWord[T] class FromWord[T] { - def apply[U](f: T => U)(g: U => T)(implicit bsf: BeamSchemaField[T]): BeamSchemaField[U] = - new Aux[U, bsf.FromT, bsf.ToT] { - override def fieldType(cm: CaseMapper): FieldType = bsf.fieldType(cm) - override def from(v: FromT)(cm: CaseMapper): U = f(bsf.from(v)(cm)) - override def to(v: U)(cm: CaseMapper): ToT = bsf.to(g(v))(cm) + def apply[U](f: T => U)(g: U => T)(implicit rf: RowField[T]): RowField[U] = + new Aux[U, rf.FromT, rf.ToT] { + override def fieldType(cm: CaseMapper): FieldType = rf.fieldType(cm) + override def from(v: FromT)(cm: CaseMapper): U = f(rf.from(v)(cm)) + override def to(v: U)(cm: CaseMapper): ToT = rf.to(g(v))(cm) } } @@ -108,13 +107,13 @@ object BeamSchemaField { // //////////////////////////////////////////////// - type Typeclass[T] = BeamSchemaField[T] - implicit def gen[T]: BeamSchemaField[T] = macro Magnolia.gen[T] + type Typeclass[T] = RowField[T] + implicit def gen[T]: RowField[T] = macro Magnolia.gen[T] def split[T]( sealedTrait: SealedTrait[Typeclass, T] - )(implicit r: shapeless.Refute[EnumType[T]]): BeamSchemaField[T] = - new BeamSchemaField[T] { + )(implicit r: shapeless.Refute[EnumType[T]]): RowField[T] = + new RowField[T] { override type FromT = logicaltypes.OneOfType.Value override type ToT = logicaltypes.OneOfType.Value @@ -149,11 +148,11 @@ object BeamSchemaField { ) } - def join[T](caseClass: CaseClass[Typeclass, T]): BeamSchemaField[T] = { + def join[T](caseClass: CaseClass[Typeclass, T]): RowField[T] = { if (caseClass.isValueClass) { val p = caseClass.parameters.head val tc = p.typeclass - new BeamSchemaField[T] { + new RowField[T] { override type FromT = tc.FromT override type ToT = tc.ToT override def fieldType(cm: CaseMapper): FieldType = tc.fieldType(cm) @@ -185,39 +184,39 @@ object BeamSchemaField { } // BYTE An 8-bit signed value - implicit val bsfByte: BeamSchemaField[Byte] = id[Byte](_ => FieldType.BYTE) + implicit val rfByte: RowField[Byte] = id[Byte](_ => FieldType.BYTE) // INT16 A 16-bit signed value - implicit val bsfShort: BeamSchemaField[Short] = id[Short](_ => FieldType.INT16) - implicit val bsfChar: BeamSchemaField[Char] = from[Short](_.toChar)(_.toShort) + implicit val rfShort: RowField[Short] = id[Short](_ => FieldType.INT16) + implicit val rfChar: RowField[Char] = from[Short](_.toChar)(_.toShort) // INT32 A 32-bit signed value - implicit val bsfInt: BeamSchemaField[Int] = id[Int](_ => FieldType.INT32) + implicit val rfInt: RowField[Int] = id[Int](_ => FieldType.INT32) // INT64 A 64-bit signed value - implicit val bsfLong: BeamSchemaField[Long] = id[Long](_ => FieldType.INT64) + implicit val rfLong: RowField[Long] = id[Long](_ => FieldType.INT64) // FLOAT A 32-bit IEEE 754 floating point number - implicit val bsfFloat: BeamSchemaField[Float] = id[Float](_ => FieldType.FLOAT) + implicit val rfFloat: RowField[Float] = id[Float](_ => FieldType.FLOAT) // DOUBLE A 64-bit IEEE 754 floating point number - implicit val bsfDouble: BeamSchemaField[Double] = id[Double](_ => FieldType.DOUBLE) + implicit val rfDouble: RowField[Double] = id[Double](_ => FieldType.DOUBLE) // STRING A string - implicit val bsfString: BeamSchemaField[String] = id[String](_ => FieldType.STRING) + implicit val rfString: RowField[String] = id[String](_ => FieldType.STRING) // BOOLEAN A boolean value - implicit val bsfBoolean: BeamSchemaField[Boolean] = id[Boolean](_ => FieldType.BOOLEAN) + implicit val rfBoolean: RowField[Boolean] = id[Boolean](_ => FieldType.BOOLEAN) // BYTES A raw byte array - implicit val bsfByteArray: BeamSchemaField[Array[Byte]] = id[Array[Byte]](_ => FieldType.BYTES) - implicit val bsfByteBuffer: BeamSchemaField[ByteBuffer] = + implicit val rfByteArray: RowField[Array[Byte]] = id[Array[Byte]](_ => FieldType.BYTES) + implicit val rfByteBuffer: RowField[ByteBuffer] = from[Array[Byte]](x => ByteBuffer.wrap(x))(_.array()) - implicit val bsfByteString: BeamSchemaField[ByteString] = + implicit val rfByteString: RowField[ByteString] = from[Array[Byte]](x => ByteString.copyFrom(x))(_.toByteArray) // DECIMAL An arbitrary-precision decimal type - implicit val bsfDecimal: BeamSchemaField[BigDecimal] = + implicit val rfDecimal: RowField[BigDecimal] = aux[BigDecimal, java.math.BigDecimal, java.math.BigDecimal](_ => FieldType.DECIMAL)( BigDecimal.apply )(_.bigDecimal) - implicit val bsfUUID: BeamSchemaField[ju.UUID] = + implicit val rfUUID: RowField[ju.UUID] = id[ju.UUID](_ => FieldType.logicalType(new logicaltypes.UuidLogicalType)) - implicit def bsfEnum[T](implicit et: EnumType[T], lp: shapeless.LowPriority): BeamSchemaField[T] = - new BeamSchemaField[T] { + implicit def rfEnum[T](implicit et: EnumType[T], lp: shapeless.LowPriority): RowField[T] = + new RowField[T] { type FromT = logicaltypes.EnumerationType.Value type ToT = logicaltypes.EnumerationType.Value @@ -240,24 +239,21 @@ object BeamSchemaField { enumType(cm).from(beamEnumType(cm).toString(v)) } - implicit def bsfMap[K, V](implicit - bsfK: BeamSchemaField[K], - bsfV: BeamSchemaField[V] - ): BeamSchemaField[Map[K, V]] = - new Aux[Map[K, V], ju.Map[bsfK.FromT, bsfV.FromT], ju.Map[bsfK.ToT, bsfV.ToT]] { + implicit def rfMap[K, V](implicit rfK: RowField[K], rfV: RowField[V]): RowField[Map[K, V]] = + new Aux[Map[K, V], ju.Map[rfK.FromT, rfV.FromT], ju.Map[rfK.ToT, rfV.ToT]] { override def fieldType(cm: CaseMapper): FieldType = - FieldType.map(bsfK.fieldType(cm), bsfV.fieldType(cm)) - override def from(v: ju.Map[bsfK.FromT, bsfV.FromT])(cm: CaseMapper): Map[K, V] = - v.asScala.map { case (k, v) => bsfK.from(k)(cm) -> bsfV.from(v)(cm) }.toMap - override def to(v: Map[K, V])(cm: CaseMapper): ju.Map[bsfK.ToT, bsfV.ToT] = - v.map { case (k, v) => bsfK.to(k)(cm) -> bsfV.to(v)(cm) }.asJava + FieldType.map(rfK.fieldType(cm), rfV.fieldType(cm)) + override def from(v: ju.Map[rfK.FromT, rfV.FromT])(cm: CaseMapper): Map[K, V] = + v.asScala.map { case (k, v) => rfK.from(k)(cm) -> rfV.from(v)(cm) }.toMap + override def to(v: Map[K, V])(cm: CaseMapper): ju.Map[rfK.ToT, rfV.ToT] = + v.map { case (k, v) => rfK.to(k)(cm) -> rfV.to(v)(cm) }.asJava } - implicit def bsfIterable[T, C[_]](implicit - f: BeamSchemaField[T], + implicit def rfIterable[T, C[_]](implicit + f: RowField[T], ti: C[T] => Iterable[T], fc: FactoryCompat[T, C[T]] - ): BeamSchemaField[C[T]] = { + ): RowField[C[T]] = { new Aux[C[T], ju.List[f.FromT], ju.List[f.ToT]] { override def from(v: ju.List[f.FromT])(cm: CaseMapper): C[T] = fc.fromSpecific(v.asScala.iterator.map(p => f.from(p)(cm))) @@ -267,7 +263,7 @@ object BeamSchemaField { } } - implicit def bsfOption[T](implicit f: BeamSchemaField[T]): BeamSchemaField[Option[T]] = { + implicit def rfOption[T](implicit f: RowField[T]): RowField[Option[T]] = { new Aux[Option[T], f.FromT, f.ToT] { override def from(v: f.FromT)(cm: CaseMapper): Option[T] = if (v == null) None else Some(f.from(v)(cm)) diff --git a/beam/src/main/scala/magnolify/beam/logical/package.scala b/beam/src/main/scala/magnolify/beam/logical/package.scala index fcd93ff1e..6e97c3356 100644 --- a/beam/src/main/scala/magnolify/beam/logical/package.scala +++ b/beam/src/main/scala/magnolify/beam/logical/package.scala @@ -29,121 +29,121 @@ package object logical { import magnolify.shared.Time._ object date { - implicit val bsfLocalDate: BeamSchemaField[jt.LocalDate] = - BeamSchemaField.id[jt.LocalDate](_ => FieldType.logicalType(new logicaltypes.Date)) + implicit val rfLocalDate: RowField[jt.LocalDate] = + RowField.id[jt.LocalDate](_ => FieldType.logicalType(new logicaltypes.Date)) private lazy val EpochJodaDate = new joda.LocalDate(1970, 1, 1) - implicit val bsfJodaLocalDate: BeamSchemaField[joda.LocalDate] = - BeamSchemaField.from[jt.LocalDate](jtld => + implicit val rfJodaLocalDate: RowField[joda.LocalDate] = + RowField.from[jt.LocalDate](jtld => EpochJodaDate.plusDays(jtld.getLong(ChronoField.EPOCH_DAY).toInt) )(d => jt.LocalDate.ofEpochDay(joda.Days.daysBetween(EpochJodaDate, d).getDays.toLong)) } object millis { - implicit lazy val bsfInstantMillis: BeamSchemaField[jt.Instant] = - BeamSchemaField.from[joda.Instant](i => millisToInstant(millisFromJodaInstant(i)))(i => + implicit lazy val rfInstantMillis: RowField[jt.Instant] = + RowField.from[joda.Instant](i => millisToInstant(millisFromJodaInstant(i)))(i => millisToJodaInstant(millisFromInstant(i)) ) - implicit val bsfJodaInstantMillis: BeamSchemaField[joda.Instant] = - BeamSchemaField.id[joda.Instant](_ => FieldType.DATETIME) - implicit val bsfJodaDateTimeMillis: BeamSchemaField[joda.DateTime] = - BeamSchemaField.from[joda.Instant](_.toDateTime(ISOChronology.getInstanceUTC))(_.toInstant) - - implicit val bsfLocalTimeMillis: BeamSchemaField[jt.LocalTime] = - BeamSchemaField.from[Int](millisToLocalTime)(millisFromLocalTime) - implicit val bsfJodaLocalTimeMillis: BeamSchemaField[joda.LocalTime] = - BeamSchemaField.from[Int](millisToJodaLocalTime)(millisFromJodaLocalTime) - - implicit val bsfLocalDateTimeMillis: BeamSchemaField[jt.LocalDateTime] = - BeamSchemaField.id[jt.LocalDateTime](_ => FieldType.logicalType(new logicaltypes.DateTime())) - implicit val bsfJodaLocalDateTimeMillis: BeamSchemaField[joda.LocalDateTime] = - BeamSchemaField.from[jt.LocalDateTime](ldt => + implicit val rfJodaInstantMillis: RowField[joda.Instant] = + RowField.id[joda.Instant](_ => FieldType.DATETIME) + implicit val rfJodaDateTimeMillis: RowField[joda.DateTime] = + RowField.from[joda.Instant](_.toDateTime(ISOChronology.getInstanceUTC))(_.toInstant) + + implicit val rfLocalTimeMillis: RowField[jt.LocalTime] = + RowField.from[Int](millisToLocalTime)(millisFromLocalTime) + implicit val rfJodaLocalTimeMillis: RowField[joda.LocalTime] = + RowField.from[Int](millisToJodaLocalTime)(millisFromJodaLocalTime) + + implicit val rfLocalDateTimeMillis: RowField[jt.LocalDateTime] = + RowField.id[jt.LocalDateTime](_ => FieldType.logicalType(new logicaltypes.DateTime())) + implicit val rfJodaLocalDateTimeMillis: RowField[joda.LocalDateTime] = + RowField.from[jt.LocalDateTime](ldt => millisToJodaLocalDateTime(millisFromLocalDateTime(ldt)) )(ldt => millisToLocalDateTime(millisFromJodaLocalDateTime(ldt))) - implicit val bsfDurationMillis: BeamSchemaField[jt.Duration] = - BeamSchemaField.from[Long](millisToDuration)(millisFromDuration) - implicit val bsfJodaDurationMillis: BeamSchemaField[joda.Duration] = - BeamSchemaField.from[Long](millisToJodaDuration)(millisFromJodaDuration) + implicit val rfDurationMillis: RowField[jt.Duration] = + RowField.from[Long](millisToDuration)(millisFromDuration) + implicit val rfJodaDurationMillis: RowField[joda.Duration] = + RowField.from[Long](millisToJodaDuration)(millisFromJodaDuration) } object micros { // NOTE: logicaltypes.MicrosInstant() cannot be used as it throws assertion // errors when greater-than-microsecond precision data is used - implicit val bsfInstantMicros: BeamSchemaField[jt.Instant] = - BeamSchemaField.from[Long](microsToInstant)(microsFromInstant) + implicit val rfInstantMicros: RowField[jt.Instant] = + RowField.from[Long](microsToInstant)(microsFromInstant) // joda.Instant has millisecond precision, excess precision discarded - implicit val bsfJodaInstantMicros: BeamSchemaField[joda.Instant] = - BeamSchemaField.from[Long](microsToJodaInstant)(microsFromJodaInstant) + implicit val rfJodaInstantMicros: RowField[joda.Instant] = + RowField.from[Long](microsToJodaInstant)(microsFromJodaInstant) // joda.DateTime only has millisecond resolution, so excess precision is discarded - implicit val bsfJodaDateTimeMicros: BeamSchemaField[joda.DateTime] = - BeamSchemaField.from[Long](microsToJodaDateTime)(microsFromJodaDateTime) + implicit val rfJodaDateTimeMicros: RowField[joda.DateTime] = + RowField.from[Long](microsToJodaDateTime)(microsFromJodaDateTime) - implicit val bsfLocalTimeMicros: BeamSchemaField[jt.LocalTime] = - BeamSchemaField.from[Long](microsToLocalTime)(microsFromLocalTime) + implicit val rfLocalTimeMicros: RowField[jt.LocalTime] = + RowField.from[Long](microsToLocalTime)(microsFromLocalTime) // joda.LocalTime only has millisecond resolution, so excess precision is discarded - implicit val bsfJodaLocalTimeMicros: BeamSchemaField[joda.LocalTime] = - BeamSchemaField.from[Long](microsToJodaLocalTime)(microsFromJodaLocalTime) + implicit val rfJodaLocalTimeMicros: RowField[joda.LocalTime] = + RowField.from[Long](microsToJodaLocalTime)(microsFromJodaLocalTime) - implicit val bsfLocalDateTimeMicros: BeamSchemaField[jt.LocalDateTime] = - BeamSchemaField.from[Long](microsToLocalDateTime)(microsFromLocalDateTime) + implicit val rfLocalDateTimeMicros: RowField[jt.LocalDateTime] = + RowField.from[Long](microsToLocalDateTime)(microsFromLocalDateTime) // joda.LocalDateTime has millisecond precision, excess precision discarded - implicit val bsfJodaLocalDateTimeMicros: BeamSchemaField[joda.LocalDateTime] = - BeamSchemaField.from[Long](microsToJodaLocalDateTime)(microsFromJodaLocalDateTime) + implicit val rfJodaLocalDateTimeMicros: RowField[joda.LocalDateTime] = + RowField.from[Long](microsToJodaLocalDateTime)(microsFromJodaLocalDateTime) - implicit val bsfDurationMicros: BeamSchemaField[jt.Duration] = - BeamSchemaField.from[Long](microsToDuration)(microsFromDuration) + implicit val rfDurationMicros: RowField[jt.Duration] = + RowField.from[Long](microsToDuration)(microsFromDuration) // joda.Duration has millisecond precision, excess precision discarded - implicit val bsfJodaDurationMicros: BeamSchemaField[joda.Duration] = - BeamSchemaField.from[Long](microsToJodaDuration)(microsFromJodaDuration) + implicit val rfJodaDurationMicros: RowField[joda.Duration] = + RowField.from[Long](microsToJodaDuration)(microsFromJodaDuration) } object nanos { - implicit val bsfInstantNanos: BeamSchemaField[jt.Instant] = - BeamSchemaField.id[jt.Instant](_ => FieldType.logicalType(new logicaltypes.NanosInstant())) + implicit val rfInstantNanos: RowField[jt.Instant] = + RowField.id[jt.Instant](_ => FieldType.logicalType(new logicaltypes.NanosInstant())) // joda.Instant has millisecond precision, excess precision discarded - implicit val bsfJodaInstantNanos: BeamSchemaField[joda.Instant] = - BeamSchemaField.from[jt.Instant](i => nanosToJodaInstant(nanosFromInstant(i)))(i => + implicit val rfJodaInstantNanos: RowField[joda.Instant] = + RowField.from[jt.Instant](i => nanosToJodaInstant(nanosFromInstant(i)))(i => nanosToInstant(nanosFromJodaInstant(i)) ) // joda.DateTime only has millisecond resolution - implicit val bsfJodaDateTimeNanos: BeamSchemaField[joda.DateTime] = - BeamSchemaField.from[jt.Instant](i => nanosToJodaDateTime(nanosFromInstant(i)))(i => + implicit val rfJodaDateTimeNanos: RowField[joda.DateTime] = + RowField.from[jt.Instant](i => nanosToJodaDateTime(nanosFromInstant(i)))(i => nanosToInstant(nanosFromJodaDateTime(i)) ) - implicit val bsfLocalTimeNanos: BeamSchemaField[jt.LocalTime] = - BeamSchemaField.id[jt.LocalTime](_ => FieldType.logicalType(new logicaltypes.Time())) + implicit val rfLocalTimeNanos: RowField[jt.LocalTime] = + RowField.id[jt.LocalTime](_ => FieldType.logicalType(new logicaltypes.Time())) // joda.LocalTime only has millisecond resolution, so excess precision is discarded - implicit val bsfJodaLocalTimeNanos: BeamSchemaField[joda.LocalTime] = - BeamSchemaField.from[jt.LocalTime](lt => nanosToJodaLocalTime(nanosFromLocalTime(lt)))(lt => + implicit val rfJodaLocalTimeNanos: RowField[joda.LocalTime] = + RowField.from[jt.LocalTime](lt => nanosToJodaLocalTime(nanosFromLocalTime(lt)))(lt => nanosToLocalTime(nanosFromJodaLocalTime(lt)) ) - implicit val bsfLocalDateTimeNanos: BeamSchemaField[jt.LocalDateTime] = - BeamSchemaField.from[Long](nanosToLocalDateTime)(nanosFromLocalDateTime) + implicit val rfLocalDateTimeNanos: RowField[jt.LocalDateTime] = + RowField.from[Long](nanosToLocalDateTime)(nanosFromLocalDateTime) // joda.LocalDateTime has millisecond precision, excess precision discarded - implicit val bsfJodaLocalDateTimeMicros: BeamSchemaField[joda.LocalDateTime] = - BeamSchemaField.from[jt.LocalDateTime](ldt => - nanosToJodaLocalDateTime(nanosFromLocalDateTime(ldt)) - )(ldt => nanosToLocalDateTime(nanosFromJodaLocalDateTime(ldt))) + implicit val rfJodaLocalDateTimeMicros: RowField[joda.LocalDateTime] = + RowField.from[jt.LocalDateTime](ldt => nanosToJodaLocalDateTime(nanosFromLocalDateTime(ldt)))( + ldt => nanosToLocalDateTime(nanosFromJodaLocalDateTime(ldt)) + ) - implicit val bsfDurationNanos: BeamSchemaField[jt.Duration] = - BeamSchemaField.id[jt.Duration](_ => FieldType.logicalType(new logicaltypes.NanosDuration())) + implicit val rfDurationNanos: RowField[jt.Duration] = + RowField.id[jt.Duration](_ => FieldType.logicalType(new logicaltypes.NanosDuration())) // joda.Duration has millisecond precision, excess precision discarded - implicit val bsfJodaDurationNanos: BeamSchemaField[joda.Duration] = - BeamSchemaField.from[jt.Duration](d => nanosToJodaDuration(nanosFromDuration(d)))(d => + implicit val rfJodaDurationNanos: RowField[joda.Duration] = + RowField.from[jt.Duration](d => nanosToJodaDuration(nanosFromDuration(d)))(d => nanosToDuration(nanosFromJodaDuration(d)) ) } object sql { - implicit val bsfSqlLocalTime: BeamSchemaField[jt.LocalTime] = - BeamSchemaField.id(_ => FieldType.logicalType(SqlTypes.TIME)) - implicit val bsfSqlInstant: BeamSchemaField[jt.Instant] = - BeamSchemaField.id(_ => FieldType.logicalType(SqlTypes.TIMESTAMP)) - implicit val bsfSqlLocalDateTime: BeamSchemaField[jt.LocalDateTime] = - BeamSchemaField.id(_ => FieldType.logicalType(SqlTypes.DATETIME)) - implicit val bsfSqlLocalDate: BeamSchemaField[jt.LocalDate] = - BeamSchemaField.id(_ => FieldType.logicalType(SqlTypes.DATE)) + implicit val rfSqlLocalTime: RowField[jt.LocalTime] = + RowField.id(_ => FieldType.logicalType(SqlTypes.TIME)) + implicit val rfSqlInstant: RowField[jt.Instant] = + RowField.id(_ => FieldType.logicalType(SqlTypes.TIMESTAMP)) + implicit val rfSqlLocalDateTime: RowField[jt.LocalDateTime] = + RowField.id(_ => FieldType.logicalType(SqlTypes.DATETIME)) + implicit val rfSqlLocalDate: RowField[jt.LocalDate] = + RowField.id(_ => FieldType.logicalType(SqlTypes.DATE)) } } diff --git a/beam/src/main/scala/magnolify/beam/unsafe/package.scala b/beam/src/main/scala/magnolify/beam/unsafe/package.scala index 88ba49f39..f17a22f75 100644 --- a/beam/src/main/scala/magnolify/beam/unsafe/package.scala +++ b/beam/src/main/scala/magnolify/beam/unsafe/package.scala @@ -19,6 +19,6 @@ package magnolify.beam import magnolify.shared.* package object unsafe { - implicit def afUnsafeEnum[T: EnumType]: BeamSchemaField[UnsafeEnum[T]] = - BeamSchemaField.from[String](UnsafeEnum.from[T])(UnsafeEnum.to[T]) + implicit def afUnsafeEnum[T: EnumType]: RowField[UnsafeEnum[T]] = + RowField.from[String](UnsafeEnum.from[T])(UnsafeEnum.to[T]) } diff --git a/docs/beam.md b/docs/beam.md index 813bc33dd..5c47a227c 100644 --- a/docs/beam.md +++ b/docs/beam.md @@ -1,6 +1,6 @@ # Beam -`RowType[T]` provides conversion between Scala type `T` and a [Beam Schema](https://beam.apache.org/documentation/programming-guide/#schema-definition). Custom support for type `T` can be added with an implicit intsance of `BeamSchemaField[T]`. +`RowType[T]` provides conversion between Scala type `T` and a Beam Row, backed by a [Beam Schema](https://beam.apache.org/documentation/programming-guide/#schema-definition). Custom support for type `T` can be added with an implicit instance of `RowField[T]`. ```scala mdoc:compile-only import java.net.URI @@ -11,7 +11,7 @@ val record = Outer(Inner(1L, "hello", URI.create("https://www.spotify.com"))) import magnolify.beam.* // Encode custom type URI as String -implicit val uriField: BeamSchemaField[URI] = BeamSchemaField.from[String](URI.create)(_.toString) +implicit val uriField: RowField[URI] = RowField.from[String](URI.create)(_.toString) val rowType = RowType[Outer] val row = rowType.to(record) From 50f67ec6968faa524cac8a3f1769800ad38e1f33 Mon Sep 17 00:00:00 2001 From: Michel Davit Date: Thu, 5 Sep 2024 16:04:35 +0200 Subject: [PATCH 31/31] rename implicit instance --- beam/src/main/scala/magnolify/beam/unsafe/package.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beam/src/main/scala/magnolify/beam/unsafe/package.scala b/beam/src/main/scala/magnolify/beam/unsafe/package.scala index f17a22f75..72ea47908 100644 --- a/beam/src/main/scala/magnolify/beam/unsafe/package.scala +++ b/beam/src/main/scala/magnolify/beam/unsafe/package.scala @@ -19,6 +19,6 @@ package magnolify.beam import magnolify.shared.* package object unsafe { - implicit def afUnsafeEnum[T: EnumType]: RowField[UnsafeEnum[T]] = + implicit def rfUnsafeEnum[T: EnumType]: RowField[UnsafeEnum[T]] = RowField.from[String](UnsafeEnum.from[T])(UnsafeEnum.to[T]) }