Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds macro compiler #697

Merged
merged 2 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions src/main/java/com/amazon/ion/impl/macro/Macro.kt
Original file line number Diff line number Diff line change
@@ -1,23 +1,45 @@
package com.amazon.ion.impl.macro

import java.math.BigDecimal

/**
* Marker interface for Macros
* A [Macro] is either a [SystemMacro] or a [TemplateMacro].
*/
sealed interface Macro
sealed interface Macro {
val signature: List<Parameter>

data class Parameter(val variableName: String, val type: ParameterEncoding, val grouped: Boolean)

enum class ParameterEncoding(val ionTextName: String) {
Tagged("any"),
// TODO: List all of the possible tagless encodings
}
}

/**
* Represents a template macro. A template macro is defined by a name, a signature, and a list of template expressions.
* Represents a template macro. A template macro is defined by a signature, and a list of template expressions.
* A template macro only gains a name and/or ID when it is added to a macro table.
*/
data class TemplateMacro(val name: String, val f: BigDecimal, val signature: MacroSignature, val body: List<TemplateExpression>) : Macro
data class TemplateMacro(override val signature: List<Macro.Parameter>, val body: List<TemplateBodyExpression>) : Macro {
private val cachedHashCode by lazy { signature.hashCode() * 31 + body.hashCode() }
override fun hashCode(): Int = cachedHashCode

override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is TemplateMacro) return false
// Check the hashCode as a quick check before we dive into the actual data.
if (cachedHashCode != other.cachedHashCode) return false
if (signature != other.signature) return false
if (body != other.body) return false
return true
}
}

/**
* Macros that are built in, rather than being defined by a template.
*/
enum class SystemMacro : Macro {
Stream, // A stream is technically not a macro, but we can implement it as a macro that is the identity function.
Annotate,
MakeString,
enum class SystemMacro(override val signature: List<Macro.Parameter>) : Macro {
// TODO: replace these placeholders
Stream(emptyList()), // A stream is technically not a macro, but we can implement it as a macro that is the identity function.
Annotate(emptyList()),
MakeString(listOf(Macro.Parameter("text", Macro.ParameterEncoding.Tagged, grouped = true))),
// TODO: Other system macros
}
240 changes: 240 additions & 0 deletions src/main/java/com/amazon/ion/impl/macro/MacroCompiler.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

package com.amazon.ion.impl.macro

import com.amazon.ion.*
import com.amazon.ion.impl.macro.TemplateBodyExpression.*
import com.amazon.ion.util.confirm

/**
* [MacroCompiler] wraps an [IonReader]. When directed to do so, it will take over advancing and getting values from the
* reader in order to read one [TemplateMacro].
*
* This is currently implemented using [IonReader], but it could be adapted to work with
* [IonReaderContinuableCore][com.amazon.ion.impl.IonReaderContinuableCore].
*/
class MacroCompiler(private val reader: IonReader) {
// TODO: Make sure that we can throw exceptions if there's an over-sized value.

/** The name of the macro that was read. Returns `null` if no macro name is available. */
var macroName: String? = null
private set // Only mutable internally

private val signature: MutableList<Macro.Parameter> = mutableListOf()
private val expressions: MutableList<TemplateBodyExpression> = mutableListOf()

/**
* Compiles a template macro definition from the reader. Caller is responsible for positioning the reader at—but not
* stepped into—the macro template s-expression.
*
* TODO: if we switch the macro compiler to use a continuable reader, change the return type of this
* to a compiler state enum, and add a separate function to get the compiled macro once it is ready.
*/
fun compileMacro(): TemplateMacro {
macroName = null
signature.clear()
expressions.clear()

confirm(reader.type == IonType.SEXP) { "macro compilation expects a sexp starting with the keyword `macro`" }
reader.confirmNoAnnotations("a macro definition sexp")
reader.readContainer {
confirm(reader.next() == IonType.SYMBOL && reader.stringValue() == "macro") { "macro compilation expects a sexp starting with the keyword `macro`" }

nextAndCheckType(IonType.SYMBOL, "macro name")
confirmNoAnnotations("macro name")
// TODO: Enforce 'identifier' syntax subset of symbol
// Possibly add support for macro definitions without names?
macroName = symbolValue().assumeText()

nextAndCheckType(IonType.SEXP, "macro signature")
confirmNoAnnotations("macro signature")
readSignature()
confirm(next() != null) { "Macro definition is missing a template body expression." }
compileTemplateBodyExpression(isQuoted = false)
confirm(next() == null) { "Unexpected $type after template body expression." }
}
return TemplateMacro(signature.toList(), expressions.toList())
}

/**
* Reads the macro signature, populating parameters in [signature].
* Caller is responsible for making sure that the reader is positioned on (but not stepped into) the signature sexp.
*/
private fun readSignature() {
reader.forEachInContainer {
when (it) {
IonType.SYMBOL -> addParameter(grouped = false)
IonType.LIST -> {
confirmNoAnnotations(location = "grouped parameter enclosing list")
readContainer {
nextAndCheckType(IonType.SYMBOL, "parameter name")
addParameter(grouped = true)
confirm(next() == null) { "grouped parameter list must enclose only one variable name" }
}
}
else -> throw IonException("parameter must be a symbol or a list; found ${reader.type}")
}
}
}

/**
* Adds a parameter to the macro signature.
* Caller is responsible for making sure that the reader is positioned on a parameter name.
*/
private fun addParameter(grouped: Boolean) {
val annotations = reader.typeAnnotations
confirm(annotations.isEmptyOr(Macro.ParameterEncoding.Tagged.ionTextName)) { "unsupported parameter encoding ${annotations.toList()}" }
val parameterName = reader.symbolValue().assumeText()
confirm(signature.none { it.variableName == parameterName }) { "redeclaration of parameter '$parameterName'" }
signature.add(Macro.Parameter(parameterName, Macro.ParameterEncoding.Tagged, grouped))
}

/**
* Compiles the current value on the reader into a [TemplateBodyExpression] and adds it to [expressions].
* Caller is responsible for ensuring that the reader is positioned on a value.
*
* If called when the reader is not positioned on any value, throws [IllegalStateException].
*/
private fun compileTemplateBodyExpression(isQuoted: Boolean) {
// NOTE: `toList()` does not allocate for an empty list.
val annotations: List<SymbolToken> = reader.typeAnnotationSymbols.toList()

if (reader.isNullValue) {
expressions.add(NullValue(annotations, reader.type))
} else when (reader.type) {
IonType.BOOL -> expressions.add(BoolValue(annotations, reader.booleanValue()))
IonType.INT -> expressions.add(
when (reader.integerSize!!) {
IntegerSize.INT,
IntegerSize.LONG -> IntValue(annotations, reader.longValue())
IntegerSize.BIG_INTEGER -> BigIntValue(annotations, reader.bigIntegerValue())
}
)
IonType.FLOAT -> expressions.add(FloatValue(annotations, reader.doubleValue()))
IonType.DECIMAL -> expressions.add(DecimalValue(annotations, reader.decimalValue()))
IonType.TIMESTAMP -> expressions.add(TimestampValue(annotations, reader.timestampValue()))
IonType.STRING -> expressions.add(StringValue(annotations, reader.stringValue()))
IonType.BLOB -> expressions.add(BlobValue(annotations, reader.newBytes()))
IonType.CLOB -> expressions.add(ClobValue(annotations, reader.newBytes()))
IonType.SYMBOL -> {
if (isQuoted) {
expressions.add(SymbolValue(annotations, reader.symbolValue()))
} else {
val name = reader.stringValue()
reader.confirmNoAnnotations("on variable reference '$name'")
val index = signature.indexOfFirst { it.variableName == name }
confirm(index >= 0) { "variable '$name' is not recognized" }
expressions.add(Variable(index))
}
}
IonType.LIST -> compileSequence(isQuoted) { start, end -> ListValue(annotations, start, end) }
IonType.SEXP -> {
if (isQuoted) {
compileSequence(isQuoted = true) { start, end -> SExpValue(annotations, start, end) }
} else {
reader.confirmNoAnnotations(location = "a macro invocation")
compileMacroInvocation()
}
}
IonType.STRUCT -> compileStruct(annotations, isQuoted)
// IonType.NULL, IonType.DATAGRAM, null
else -> throw IllegalStateException("Found ${reader.type}; this should be unreachable.")
}
}

/**
* Compiles a struct in a macro template.
* When calling, the reader should be positioned at the struct, but not stepped into it.
* If this function returns normally, it will be stepped out of the struct.
* Caller will need to call [IonReader.next] to get the next value.
*/
private fun compileStruct(annotations: List<SymbolToken>, isQuoted: Boolean) {
val start = expressions.size
expressions.add(Placeholder)
val templateStructIndex = mutableMapOf<String, ArrayList<Int>>()
reader.forEachInContainer {
expressions.add(FieldName(fieldNameSymbol))
fieldNameSymbol.text?.let {
val valueIndex = expressions.size
// Default is an array list with capacity of 1, since the most common case is that a field name occurs once.
templateStructIndex.getOrPut(it) { ArrayList(1) } += valueIndex
}
compileTemplateBodyExpression(isQuoted)
}
val end = expressions.lastIndex
expressions[start] = StructValue(annotations, start, end, templateStructIndex)
}

/**
* Compiles a list or sexp in a macro template.
* When calling, the reader should be positioned at the sequence, but not stepped into it.
* If this function returns normally, it will be stepped out of the sequence.
* Caller will need to call [IonReader.next] to get the next value.
*/
private inline fun compileSequence(isQuoted: Boolean, newTemplateBodySequence: (Int, Int) -> TemplateBodyExpression) {
val seqStart = expressions.size
expressions.add(Placeholder)
reader.forEachInContainer { compileTemplateBodyExpression(isQuoted) }
val seqEnd = expressions.lastIndex
expressions[seqStart] = newTemplateBodySequence(seqStart, seqEnd)
}

/**
* Compiles a macro invocation in a macro template.
* When calling, the reader should be positioned at the sexp, but not stepped into it.
* If this function returns normally, it will be stepped out of the sexp.
* Caller will need to call [IonReader.next] to get the next value.
*/
private fun compileMacroInvocation() {
reader.stepIn()
val macroRef = when (reader.next()) {
IonType.SYMBOL -> {
val macroName = reader.stringValue()
// TODO: Once we have a macro table, validate name exists in current macro table.
if (macroName == "quote") null else MacroRef.ByName(macroName)
}
// TODO: When we have an ID for the macro "quote", add handling for it here.
// TODO: Once we have a macro table, validate that id exists in current macro table.
IonType.INT -> MacroRef.ById(reader.longValue())
else -> throw IonException("macro invocation must start with an id (int) or identifier (symbol); found ${reader.type ?: "nothing"}\"")
}

if (macroRef == null) {
// It's the "quote" macro; skip compiling a macro invocation and just treat all contents as literals
reader.forEachRemaining { compileTemplateBodyExpression(isQuoted = true) }
} else {
val macroStart = expressions.size
expressions.add(Placeholder)
reader.forEachRemaining { compileTemplateBodyExpression(isQuoted = false) }
val macroEnd = expressions.lastIndex
expressions[macroStart] =
MacroInvocation(macroRef, macroStart, macroEnd)
}
reader.stepOut()
}

// Helper functions

/** Utility method for checking that annotations are empty or a single array with the given annotations */
private fun Array<String>.isEmptyOr(text: String): Boolean = isEmpty() || (size == 1 && this[0] == text)

/** Throws [IonException] if any annotations are on the current value in this [IonReader]. */
private fun IonReader.confirmNoAnnotations(location: String) {
confirm(typeAnnotations.isEmpty()) { "found annotations on $location" }
}

/** Moves to the next type and throw [IonException] if it is not the `expected` [IonType]. */
private fun IonReader.nextAndCheckType(expected: IonType, location: String) {
confirm(next() == expected) { "$location must be a $expected; found ${type ?: "nothing"}" }
}

/** Steps into a container, executes [block], and steps out. */
private inline fun IonReader.readContainer(block: IonReader.() -> Unit) { stepIn(); block(); stepOut() }

/** Executes [block] for each remaining value at the current reader depth. */
private inline fun IonReader.forEachRemaining(block: IonReader.(IonType) -> Unit) { while (next() != null) { block(type) } }

/** Steps into a container, executes [block] for each value at that reader depth, and steps out. */
private inline fun IonReader.forEachInContainer(block: IonReader.(IonType) -> Unit) = readContainer { forEachRemaining(block) }
}
16 changes: 0 additions & 16 deletions src/main/java/com/amazon/ion/impl/macro/MacroSignature.kt

This file was deleted.

79 changes: 79 additions & 0 deletions src/main/java/com/amazon/ion/impl/macro/TemplateBodyExpression.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

package com.amazon.ion.impl.macro

import com.amazon.ion.*
import java.math.BigDecimal
import java.math.BigInteger

/**
* Represents an expression in the body of a template.
*
* We cannot use [`IonValue`](com.amazon.ion.IonValue) for this because `IonValue` requires references to parent
* containers and to an IonSystem which makes it impractical for reading and writing macros definitions. Furthermore,
* there is information we need to capture that cannot be expressed in the IonValue model, such as macro invocations
* and variable references.
*
* A template body is compiled into a list of expressions, without nesting, for ease and efficiency of evaluating
* e-expressions. Because of this, the container types do not have other values nested in them; rather they contain a
* range that indicates which of the following expressions are part of that container.
*/
sealed interface TemplateBodyExpression {
// TODO: Special Forms (if_void, for, ...)?

/**
* A temporary placeholder that is used only while a macro is partially compiled.
*/
object Placeholder : TemplateBodyExpression

// Scalars
data class NullValue(val annotations: List<SymbolToken> = emptyList(), val type: IonType) : TemplateBodyExpression
data class BoolValue(val annotations: List<SymbolToken> = emptyList(), val value: Boolean) : TemplateBodyExpression
data class IntValue(val annotations: List<SymbolToken> = emptyList(), val value: Long) : TemplateBodyExpression
data class BigIntValue(val annotations: List<SymbolToken> = emptyList(), val value: BigInteger) : TemplateBodyExpression
data class FloatValue(val annotations: List<SymbolToken> = emptyList(), val value: Double) : TemplateBodyExpression
data class DecimalValue(val annotations: List<SymbolToken> = emptyList(), val value: BigDecimal) : TemplateBodyExpression
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will probably have to be Decimal to support -0...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Decimal extends BigDecimal, so this can be constructed with a -0 decimal, but I can't think of any problem with changing it to Decimal here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it ends up making the tests (or other programmatic construction of a template body) more annoying if I change this to Decimal. I'm going to leave it for now, and we can revisit it later if it causes problems.

data class TimestampValue(val annotations: List<SymbolToken> = emptyList(), val value: Timestamp) : TemplateBodyExpression
data class StringValue(val annotations: List<SymbolToken> = emptyList(), val value: String) : TemplateBodyExpression
data class SymbolValue(val annotations: List<SymbolToken> = emptyList(), val value: SymbolToken) : TemplateBodyExpression
// We must override hashcode and equals in the lob types because `value` is a `byte[]`
data class BlobValue(val annotations: List<SymbolToken> = emptyList(), val value: ByteArray) : TemplateBodyExpression {
override fun hashCode(): Int = annotations.hashCode() * 31 + value.contentHashCode()
override fun equals(other: Any?): Boolean = other is BlobValue && annotations == other.annotations && value.contentEquals(other.value)
}
data class ClobValue(val annotations: List<SymbolToken> = emptyList(), val value: ByteArray) : TemplateBodyExpression {
override fun hashCode(): Int = annotations.hashCode() * 31 + value.contentHashCode()
override fun equals(other: Any?): Boolean = other is ClobValue && annotations == other.annotations && value.contentEquals(other.value)
}

/**
* An Ion List that could contain variables or macro invocations.
*
* @property startInclusive the index of the first expression of the list (i.e. this instance)
* @property endInclusive the index of the last expression contained in the list
*/
data class ListValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression

/**
* An Ion SExp that could contain variables or macro invocations.
*/
data class SExpValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression

/**
* An Ion Struct that could contain variables or macro invocations.
*/
data class StructValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int, val templateStructIndex: Map<String, List<Int>>) : TemplateBodyExpression

data class FieldName(val value: SymbolToken) : TemplateBodyExpression

/**
* A reference to a variable that needs to be expanded.
*/
data class Variable(val signatureIndex: Int) : TemplateBodyExpression

/**
* A macro invocation that needs to be expanded.
*/
data class MacroInvocation(val macro: MacroRef, val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression
}
Loading
Loading