Skip to content

Commit

Permalink
Adds macro compiler (#697)
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt authored and tgregg committed Jun 28, 2024
1 parent b2e7713 commit c9de7b9
Show file tree
Hide file tree
Showing 7 changed files with 603 additions and 74 deletions.
42 changes: 32 additions & 10 deletions src/main/java/com/amazon/ion/impl/macro/Macro.kt
Original file line number Diff line number Diff line change
@@ -1,23 +1,45 @@
package com.amazon.ion.impl.macro

import java.math.BigDecimal

/**
* Marker interface for Macros
* A [Macro] is either a [SystemMacro] or a [TemplateMacro].
*/
sealed interface Macro
sealed interface Macro {
val signature: List<Parameter>

data class Parameter(val variableName: String, val type: ParameterEncoding, val grouped: Boolean)

enum class ParameterEncoding(val ionTextName: String) {
Tagged("any"),
// TODO: List all of the possible tagless encodings
}
}

/**
* Represents a template macro. A template macro is defined by a name, a signature, and a list of template expressions.
* Represents a template macro. A template macro is defined by a signature, and a list of template expressions.
* A template macro only gains a name and/or ID when it is added to a macro table.
*/
data class TemplateMacro(val name: String, val f: BigDecimal, val signature: MacroSignature, val body: List<TemplateExpression>) : Macro
data class TemplateMacro(override val signature: List<Macro.Parameter>, val body: List<TemplateBodyExpression>) : Macro {
private val cachedHashCode by lazy { signature.hashCode() * 31 + body.hashCode() }
override fun hashCode(): Int = cachedHashCode

override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is TemplateMacro) return false
// Check the hashCode as a quick check before we dive into the actual data.
if (cachedHashCode != other.cachedHashCode) return false
if (signature != other.signature) return false
if (body != other.body) return false
return true
}
}

/**
* Macros that are built in, rather than being defined by a template.
*/
enum class SystemMacro : Macro {
Stream, // A stream is technically not a macro, but we can implement it as a macro that is the identity function.
Annotate,
MakeString,
enum class SystemMacro(override val signature: List<Macro.Parameter>) : Macro {
// TODO: replace these placeholders
Stream(emptyList()), // A stream is technically not a macro, but we can implement it as a macro that is the identity function.
Annotate(emptyList()),
MakeString(listOf(Macro.Parameter("text", Macro.ParameterEncoding.Tagged, grouped = true))),
// TODO: Other system macros
}
240 changes: 240 additions & 0 deletions src/main/java/com/amazon/ion/impl/macro/MacroCompiler.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

package com.amazon.ion.impl.macro

import com.amazon.ion.*
import com.amazon.ion.impl.macro.TemplateBodyExpression.*
import com.amazon.ion.util.confirm

/**
* [MacroCompiler] wraps an [IonReader]. When directed to do so, it will take over advancing and getting values from the
* reader in order to read one [TemplateMacro].
*
* This is currently implemented using [IonReader], but it could be adapted to work with
* [IonReaderContinuableCore][com.amazon.ion.impl.IonReaderContinuableCore].
*/
class MacroCompiler(private val reader: IonReader) {
// TODO: Make sure that we can throw exceptions if there's an over-sized value.

/** The name of the macro that was read. Returns `null` if no macro name is available. */
var macroName: String? = null
private set // Only mutable internally

private val signature: MutableList<Macro.Parameter> = mutableListOf()
private val expressions: MutableList<TemplateBodyExpression> = mutableListOf()

/**
* Compiles a template macro definition from the reader. Caller is responsible for positioning the reader at—but not
* stepped into—the macro template s-expression.
*
* TODO: if we switch the macro compiler to use a continuable reader, change the return type of this
* to a compiler state enum, and add a separate function to get the compiled macro once it is ready.
*/
fun compileMacro(): TemplateMacro {
macroName = null
signature.clear()
expressions.clear()

confirm(reader.type == IonType.SEXP) { "macro compilation expects a sexp starting with the keyword `macro`" }
reader.confirmNoAnnotations("a macro definition sexp")
reader.readContainer {
confirm(reader.next() == IonType.SYMBOL && reader.stringValue() == "macro") { "macro compilation expects a sexp starting with the keyword `macro`" }

nextAndCheckType(IonType.SYMBOL, "macro name")
confirmNoAnnotations("macro name")
// TODO: Enforce 'identifier' syntax subset of symbol
// Possibly add support for macro definitions without names?
macroName = symbolValue().assumeText()

nextAndCheckType(IonType.SEXP, "macro signature")
confirmNoAnnotations("macro signature")
readSignature()
confirm(next() != null) { "Macro definition is missing a template body expression." }
compileTemplateBodyExpression(isQuoted = false)
confirm(next() == null) { "Unexpected $type after template body expression." }
}
return TemplateMacro(signature.toList(), expressions.toList())
}

/**
* Reads the macro signature, populating parameters in [signature].
* Caller is responsible for making sure that the reader is positioned on (but not stepped into) the signature sexp.
*/
private fun readSignature() {
reader.forEachInContainer {
when (it) {
IonType.SYMBOL -> addParameter(grouped = false)
IonType.LIST -> {
confirmNoAnnotations(location = "grouped parameter enclosing list")
readContainer {
nextAndCheckType(IonType.SYMBOL, "parameter name")
addParameter(grouped = true)
confirm(next() == null) { "grouped parameter list must enclose only one variable name" }
}
}
else -> throw IonException("parameter must be a symbol or a list; found ${reader.type}")
}
}
}

/**
* Adds a parameter to the macro signature.
* Caller is responsible for making sure that the reader is positioned on a parameter name.
*/
private fun addParameter(grouped: Boolean) {
val annotations = reader.typeAnnotations
confirm(annotations.isEmptyOr(Macro.ParameterEncoding.Tagged.ionTextName)) { "unsupported parameter encoding ${annotations.toList()}" }
val parameterName = reader.symbolValue().assumeText()
confirm(signature.none { it.variableName == parameterName }) { "redeclaration of parameter '$parameterName'" }
signature.add(Macro.Parameter(parameterName, Macro.ParameterEncoding.Tagged, grouped))
}

/**
* Compiles the current value on the reader into a [TemplateBodyExpression] and adds it to [expressions].
* Caller is responsible for ensuring that the reader is positioned on a value.
*
* If called when the reader is not positioned on any value, throws [IllegalStateException].
*/
private fun compileTemplateBodyExpression(isQuoted: Boolean) {
// NOTE: `toList()` does not allocate for an empty list.
val annotations: List<SymbolToken> = reader.typeAnnotationSymbols.toList()

if (reader.isNullValue) {
expressions.add(NullValue(annotations, reader.type))
} else when (reader.type) {
IonType.BOOL -> expressions.add(BoolValue(annotations, reader.booleanValue()))
IonType.INT -> expressions.add(
when (reader.integerSize!!) {
IntegerSize.INT,
IntegerSize.LONG -> IntValue(annotations, reader.longValue())
IntegerSize.BIG_INTEGER -> BigIntValue(annotations, reader.bigIntegerValue())
}
)
IonType.FLOAT -> expressions.add(FloatValue(annotations, reader.doubleValue()))
IonType.DECIMAL -> expressions.add(DecimalValue(annotations, reader.decimalValue()))
IonType.TIMESTAMP -> expressions.add(TimestampValue(annotations, reader.timestampValue()))
IonType.STRING -> expressions.add(StringValue(annotations, reader.stringValue()))
IonType.BLOB -> expressions.add(BlobValue(annotations, reader.newBytes()))
IonType.CLOB -> expressions.add(ClobValue(annotations, reader.newBytes()))
IonType.SYMBOL -> {
if (isQuoted) {
expressions.add(SymbolValue(annotations, reader.symbolValue()))
} else {
val name = reader.stringValue()
reader.confirmNoAnnotations("on variable reference '$name'")
val index = signature.indexOfFirst { it.variableName == name }
confirm(index >= 0) { "variable '$name' is not recognized" }
expressions.add(Variable(index))
}
}
IonType.LIST -> compileSequence(isQuoted) { start, end -> ListValue(annotations, start, end) }
IonType.SEXP -> {
if (isQuoted) {
compileSequence(isQuoted = true) { start, end -> SExpValue(annotations, start, end) }
} else {
reader.confirmNoAnnotations(location = "a macro invocation")
compileMacroInvocation()
}
}
IonType.STRUCT -> compileStruct(annotations, isQuoted)
// IonType.NULL, IonType.DATAGRAM, null
else -> throw IllegalStateException("Found ${reader.type}; this should be unreachable.")
}
}

/**
* Compiles a struct in a macro template.
* When calling, the reader should be positioned at the struct, but not stepped into it.
* If this function returns normally, it will be stepped out of the struct.
* Caller will need to call [IonReader.next] to get the next value.
*/
private fun compileStruct(annotations: List<SymbolToken>, isQuoted: Boolean) {
val start = expressions.size
expressions.add(Placeholder)
val templateStructIndex = mutableMapOf<String, ArrayList<Int>>()
reader.forEachInContainer {
expressions.add(FieldName(fieldNameSymbol))
fieldNameSymbol.text?.let {
val valueIndex = expressions.size
// Default is an array list with capacity of 1, since the most common case is that a field name occurs once.
templateStructIndex.getOrPut(it) { ArrayList(1) } += valueIndex
}
compileTemplateBodyExpression(isQuoted)
}
val end = expressions.lastIndex
expressions[start] = StructValue(annotations, start, end, templateStructIndex)
}

/**
* Compiles a list or sexp in a macro template.
* When calling, the reader should be positioned at the sequence, but not stepped into it.
* If this function returns normally, it will be stepped out of the sequence.
* Caller will need to call [IonReader.next] to get the next value.
*/
private inline fun compileSequence(isQuoted: Boolean, newTemplateBodySequence: (Int, Int) -> TemplateBodyExpression) {
val seqStart = expressions.size
expressions.add(Placeholder)
reader.forEachInContainer { compileTemplateBodyExpression(isQuoted) }
val seqEnd = expressions.lastIndex
expressions[seqStart] = newTemplateBodySequence(seqStart, seqEnd)
}

/**
* Compiles a macro invocation in a macro template.
* When calling, the reader should be positioned at the sexp, but not stepped into it.
* If this function returns normally, it will be stepped out of the sexp.
* Caller will need to call [IonReader.next] to get the next value.
*/
private fun compileMacroInvocation() {
reader.stepIn()
val macroRef = when (reader.next()) {
IonType.SYMBOL -> {
val macroName = reader.stringValue()
// TODO: Once we have a macro table, validate name exists in current macro table.
if (macroName == "quote") null else MacroRef.ByName(macroName)
}
// TODO: When we have an ID for the macro "quote", add handling for it here.
// TODO: Once we have a macro table, validate that id exists in current macro table.
IonType.INT -> MacroRef.ById(reader.longValue())
else -> throw IonException("macro invocation must start with an id (int) or identifier (symbol); found ${reader.type ?: "nothing"}\"")
}

if (macroRef == null) {
// It's the "quote" macro; skip compiling a macro invocation and just treat all contents as literals
reader.forEachRemaining { compileTemplateBodyExpression(isQuoted = true) }
} else {
val macroStart = expressions.size
expressions.add(Placeholder)
reader.forEachRemaining { compileTemplateBodyExpression(isQuoted = false) }
val macroEnd = expressions.lastIndex
expressions[macroStart] =
MacroInvocation(macroRef, macroStart, macroEnd)
}
reader.stepOut()
}

// Helper functions

/** Utility method for checking that annotations are empty or a single array with the given annotations */
private fun Array<String>.isEmptyOr(text: String): Boolean = isEmpty() || (size == 1 && this[0] == text)

/** Throws [IonException] if any annotations are on the current value in this [IonReader]. */
private fun IonReader.confirmNoAnnotations(location: String) {
confirm(typeAnnotations.isEmpty()) { "found annotations on $location" }
}

/** Moves to the next type and throw [IonException] if it is not the `expected` [IonType]. */
private fun IonReader.nextAndCheckType(expected: IonType, location: String) {
confirm(next() == expected) { "$location must be a $expected; found ${type ?: "nothing"}" }
}

/** Steps into a container, executes [block], and steps out. */
private inline fun IonReader.readContainer(block: IonReader.() -> Unit) { stepIn(); block(); stepOut() }

/** Executes [block] for each remaining value at the current reader depth. */
private inline fun IonReader.forEachRemaining(block: IonReader.(IonType) -> Unit) { while (next() != null) { block(type) } }

/** Steps into a container, executes [block] for each value at that reader depth, and steps out. */
private inline fun IonReader.forEachInContainer(block: IonReader.(IonType) -> Unit) = readContainer { forEachRemaining(block) }
}
16 changes: 0 additions & 16 deletions src/main/java/com/amazon/ion/impl/macro/MacroSignature.kt

This file was deleted.

79 changes: 79 additions & 0 deletions src/main/java/com/amazon/ion/impl/macro/TemplateBodyExpression.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

package com.amazon.ion.impl.macro

import com.amazon.ion.*
import java.math.BigDecimal
import java.math.BigInteger

/**
* Represents an expression in the body of a template.
*
* We cannot use [`IonValue`](com.amazon.ion.IonValue) for this because `IonValue` requires references to parent
* containers and to an IonSystem which makes it impractical for reading and writing macros definitions. Furthermore,
* there is information we need to capture that cannot be expressed in the IonValue model, such as macro invocations
* and variable references.
*
* A template body is compiled into a list of expressions, without nesting, for ease and efficiency of evaluating
* e-expressions. Because of this, the container types do not have other values nested in them; rather they contain a
* range that indicates which of the following expressions are part of that container.
*/
sealed interface TemplateBodyExpression {
// TODO: Special Forms (if_void, for, ...)?

/**
* A temporary placeholder that is used only while a macro is partially compiled.
*/
object Placeholder : TemplateBodyExpression

// Scalars
data class NullValue(val annotations: List<SymbolToken> = emptyList(), val type: IonType) : TemplateBodyExpression
data class BoolValue(val annotations: List<SymbolToken> = emptyList(), val value: Boolean) : TemplateBodyExpression
data class IntValue(val annotations: List<SymbolToken> = emptyList(), val value: Long) : TemplateBodyExpression
data class BigIntValue(val annotations: List<SymbolToken> = emptyList(), val value: BigInteger) : TemplateBodyExpression
data class FloatValue(val annotations: List<SymbolToken> = emptyList(), val value: Double) : TemplateBodyExpression
data class DecimalValue(val annotations: List<SymbolToken> = emptyList(), val value: BigDecimal) : TemplateBodyExpression
data class TimestampValue(val annotations: List<SymbolToken> = emptyList(), val value: Timestamp) : TemplateBodyExpression
data class StringValue(val annotations: List<SymbolToken> = emptyList(), val value: String) : TemplateBodyExpression
data class SymbolValue(val annotations: List<SymbolToken> = emptyList(), val value: SymbolToken) : TemplateBodyExpression
// We must override hashcode and equals in the lob types because `value` is a `byte[]`
data class BlobValue(val annotations: List<SymbolToken> = emptyList(), val value: ByteArray) : TemplateBodyExpression {
override fun hashCode(): Int = annotations.hashCode() * 31 + value.contentHashCode()
override fun equals(other: Any?): Boolean = other is BlobValue && annotations == other.annotations && value.contentEquals(other.value)
}
data class ClobValue(val annotations: List<SymbolToken> = emptyList(), val value: ByteArray) : TemplateBodyExpression {
override fun hashCode(): Int = annotations.hashCode() * 31 + value.contentHashCode()
override fun equals(other: Any?): Boolean = other is ClobValue && annotations == other.annotations && value.contentEquals(other.value)
}

/**
* An Ion List that could contain variables or macro invocations.
*
* @property startInclusive the index of the first expression of the list (i.e. this instance)
* @property endInclusive the index of the last expression contained in the list
*/
data class ListValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression

/**
* An Ion SExp that could contain variables or macro invocations.
*/
data class SExpValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression

/**
* An Ion Struct that could contain variables or macro invocations.
*/
data class StructValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int, val templateStructIndex: Map<String, List<Int>>) : TemplateBodyExpression

data class FieldName(val value: SymbolToken) : TemplateBodyExpression

/**
* A reference to a variable that needs to be expanded.
*/
data class Variable(val signatureIndex: Int) : TemplateBodyExpression

/**
* A macro invocation that needs to be expanded.
*/
data class MacroInvocation(val macro: MacroRef, val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression
}
Loading

0 comments on commit c9de7b9

Please sign in to comment.