-
Notifications
You must be signed in to change notification settings - Fork 111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds macro compiler #697
Merged
Merged
Adds macro compiler #697
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,45 @@ | ||
package com.amazon.ion.impl.macro | ||
|
||
import java.math.BigDecimal | ||
|
||
/** | ||
* Marker interface for Macros | ||
* A [Macro] is either a [SystemMacro] or a [TemplateMacro]. | ||
*/ | ||
sealed interface Macro | ||
sealed interface Macro { | ||
val signature: List<Parameter> | ||
|
||
data class Parameter(val variableName: String, val type: ParameterEncoding, val grouped: Boolean) | ||
|
||
enum class ParameterEncoding(val ionTextName: String) { | ||
Tagged("any"), | ||
// TODO: List all of the possible tagless encodings | ||
} | ||
} | ||
|
||
/** | ||
* Represents a template macro. A template macro is defined by a name, a signature, and a list of template expressions. | ||
* Represents a template macro. A template macro is defined by a signature, and a list of template expressions. | ||
* A template macro only gains a name and/or ID when it is added to a macro table. | ||
*/ | ||
data class TemplateMacro(val name: String, val f: BigDecimal, val signature: MacroSignature, val body: List<TemplateExpression>) : Macro | ||
data class TemplateMacro(override val signature: List<Macro.Parameter>, val body: List<TemplateBodyExpression>) : Macro { | ||
private val cachedHashCode by lazy { signature.hashCode() * 31 + body.hashCode() } | ||
override fun hashCode(): Int = cachedHashCode | ||
|
||
override fun equals(other: Any?): Boolean { | ||
if (this === other) return true | ||
if (other !is TemplateMacro) return false | ||
// Check the hashCode as a quick check before we dive into the actual data. | ||
if (cachedHashCode != other.cachedHashCode) return false | ||
if (signature != other.signature) return false | ||
if (body != other.body) return false | ||
return true | ||
} | ||
} | ||
|
||
/** | ||
* Macros that are built in, rather than being defined by a template. | ||
*/ | ||
enum class SystemMacro : Macro { | ||
Stream, // A stream is technically not a macro, but we can implement it as a macro that is the identity function. | ||
Annotate, | ||
MakeString, | ||
enum class SystemMacro(override val signature: List<Macro.Parameter>) : Macro { | ||
// TODO: replace these placeholders | ||
Stream(emptyList()), // A stream is technically not a macro, but we can implement it as a macro that is the identity function. | ||
Annotate(emptyList()), | ||
MakeString(listOf(Macro.Parameter("text", Macro.ParameterEncoding.Tagged, grouped = true))), | ||
// TODO: Other system macros | ||
} |
240 changes: 240 additions & 0 deletions
240
src/main/java/com/amazon/ion/impl/macro/MacroCompiler.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,240 @@ | ||
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package com.amazon.ion.impl.macro | ||
|
||
import com.amazon.ion.* | ||
import com.amazon.ion.impl.macro.TemplateBodyExpression.* | ||
import com.amazon.ion.util.confirm | ||
|
||
/** | ||
* [MacroCompiler] wraps an [IonReader]. When directed to do so, it will take over advancing and getting values from the | ||
* reader in order to read one [TemplateMacro]. | ||
* | ||
* This is currently implemented using [IonReader], but it could be adapted to work with | ||
* [IonReaderContinuableCore][com.amazon.ion.impl.IonReaderContinuableCore]. | ||
*/ | ||
class MacroCompiler(private val reader: IonReader) { | ||
// TODO: Make sure that we can throw exceptions if there's an over-sized value. | ||
|
||
/** The name of the macro that was read. Returns `null` if no macro name is available. */ | ||
var macroName: String? = null | ||
private set // Only mutable internally | ||
|
||
private val signature: MutableList<Macro.Parameter> = mutableListOf() | ||
private val expressions: MutableList<TemplateBodyExpression> = mutableListOf() | ||
|
||
/** | ||
* Compiles a template macro definition from the reader. Caller is responsible for positioning the reader at—but not | ||
* stepped into—the macro template s-expression. | ||
* | ||
* TODO: if we switch the macro compiler to use a continuable reader, change the return type of this | ||
* to a compiler state enum, and add a separate function to get the compiled macro once it is ready. | ||
*/ | ||
fun compileMacro(): TemplateMacro { | ||
macroName = null | ||
signature.clear() | ||
expressions.clear() | ||
|
||
confirm(reader.type == IonType.SEXP) { "macro compilation expects a sexp starting with the keyword `macro`" } | ||
reader.confirmNoAnnotations("a macro definition sexp") | ||
reader.readContainer { | ||
confirm(reader.next() == IonType.SYMBOL && reader.stringValue() == "macro") { "macro compilation expects a sexp starting with the keyword `macro`" } | ||
|
||
nextAndCheckType(IonType.SYMBOL, "macro name") | ||
confirmNoAnnotations("macro name") | ||
// TODO: Enforce 'identifier' syntax subset of symbol | ||
// Possibly add support for macro definitions without names? | ||
macroName = symbolValue().assumeText() | ||
|
||
nextAndCheckType(IonType.SEXP, "macro signature") | ||
confirmNoAnnotations("macro signature") | ||
readSignature() | ||
confirm(next() != null) { "Macro definition is missing a template body expression." } | ||
compileTemplateBodyExpression(isQuoted = false) | ||
confirm(next() == null) { "Unexpected $type after template body expression." } | ||
} | ||
return TemplateMacro(signature.toList(), expressions.toList()) | ||
} | ||
|
||
/** | ||
* Reads the macro signature, populating parameters in [signature]. | ||
* Caller is responsible for making sure that the reader is positioned on (but not stepped into) the signature sexp. | ||
*/ | ||
private fun readSignature() { | ||
reader.forEachInContainer { | ||
when (it) { | ||
IonType.SYMBOL -> addParameter(grouped = false) | ||
IonType.LIST -> { | ||
confirmNoAnnotations(location = "grouped parameter enclosing list") | ||
readContainer { | ||
nextAndCheckType(IonType.SYMBOL, "parameter name") | ||
addParameter(grouped = true) | ||
confirm(next() == null) { "grouped parameter list must enclose only one variable name" } | ||
} | ||
} | ||
else -> throw IonException("parameter must be a symbol or a list; found ${reader.type}") | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Adds a parameter to the macro signature. | ||
* Caller is responsible for making sure that the reader is positioned on a parameter name. | ||
*/ | ||
private fun addParameter(grouped: Boolean) { | ||
val annotations = reader.typeAnnotations | ||
confirm(annotations.isEmptyOr(Macro.ParameterEncoding.Tagged.ionTextName)) { "unsupported parameter encoding ${annotations.toList()}" } | ||
val parameterName = reader.symbolValue().assumeText() | ||
confirm(signature.none { it.variableName == parameterName }) { "redeclaration of parameter '$parameterName'" } | ||
signature.add(Macro.Parameter(parameterName, Macro.ParameterEncoding.Tagged, grouped)) | ||
} | ||
|
||
/** | ||
* Compiles the current value on the reader into a [TemplateBodyExpression] and adds it to [expressions]. | ||
* Caller is responsible for ensuring that the reader is positioned on a value. | ||
* | ||
* If called when the reader is not positioned on any value, throws [IllegalStateException]. | ||
*/ | ||
private fun compileTemplateBodyExpression(isQuoted: Boolean) { | ||
// NOTE: `toList()` does not allocate for an empty list. | ||
val annotations: List<SymbolToken> = reader.typeAnnotationSymbols.toList() | ||
|
||
if (reader.isNullValue) { | ||
expressions.add(NullValue(annotations, reader.type)) | ||
} else when (reader.type) { | ||
IonType.BOOL -> expressions.add(BoolValue(annotations, reader.booleanValue())) | ||
IonType.INT -> expressions.add( | ||
when (reader.integerSize!!) { | ||
IntegerSize.INT, | ||
IntegerSize.LONG -> IntValue(annotations, reader.longValue()) | ||
IntegerSize.BIG_INTEGER -> BigIntValue(annotations, reader.bigIntegerValue()) | ||
} | ||
) | ||
IonType.FLOAT -> expressions.add(FloatValue(annotations, reader.doubleValue())) | ||
IonType.DECIMAL -> expressions.add(DecimalValue(annotations, reader.decimalValue())) | ||
IonType.TIMESTAMP -> expressions.add(TimestampValue(annotations, reader.timestampValue())) | ||
IonType.STRING -> expressions.add(StringValue(annotations, reader.stringValue())) | ||
IonType.BLOB -> expressions.add(BlobValue(annotations, reader.newBytes())) | ||
IonType.CLOB -> expressions.add(ClobValue(annotations, reader.newBytes())) | ||
IonType.SYMBOL -> { | ||
if (isQuoted) { | ||
expressions.add(SymbolValue(annotations, reader.symbolValue())) | ||
} else { | ||
val name = reader.stringValue() | ||
reader.confirmNoAnnotations("on variable reference '$name'") | ||
val index = signature.indexOfFirst { it.variableName == name } | ||
confirm(index >= 0) { "variable '$name' is not recognized" } | ||
expressions.add(Variable(index)) | ||
} | ||
} | ||
IonType.LIST -> compileSequence(isQuoted) { start, end -> ListValue(annotations, start, end) } | ||
IonType.SEXP -> { | ||
if (isQuoted) { | ||
compileSequence(isQuoted = true) { start, end -> SExpValue(annotations, start, end) } | ||
} else { | ||
reader.confirmNoAnnotations(location = "a macro invocation") | ||
compileMacroInvocation() | ||
} | ||
} | ||
IonType.STRUCT -> compileStruct(annotations, isQuoted) | ||
// IonType.NULL, IonType.DATAGRAM, null | ||
else -> throw IllegalStateException("Found ${reader.type}; this should be unreachable.") | ||
} | ||
} | ||
|
||
/** | ||
* Compiles a struct in a macro template. | ||
* When calling, the reader should be positioned at the struct, but not stepped into it. | ||
* If this function returns normally, it will be stepped out of the struct. | ||
* Caller will need to call [IonReader.next] to get the next value. | ||
*/ | ||
private fun compileStruct(annotations: List<SymbolToken>, isQuoted: Boolean) { | ||
val start = expressions.size | ||
expressions.add(Placeholder) | ||
val templateStructIndex = mutableMapOf<String, ArrayList<Int>>() | ||
reader.forEachInContainer { | ||
expressions.add(FieldName(fieldNameSymbol)) | ||
fieldNameSymbol.text?.let { | ||
val valueIndex = expressions.size | ||
// Default is an array list with capacity of 1, since the most common case is that a field name occurs once. | ||
templateStructIndex.getOrPut(it) { ArrayList(1) } += valueIndex | ||
} | ||
compileTemplateBodyExpression(isQuoted) | ||
} | ||
val end = expressions.lastIndex | ||
expressions[start] = StructValue(annotations, start, end, templateStructIndex) | ||
} | ||
|
||
/** | ||
* Compiles a list or sexp in a macro template. | ||
* When calling, the reader should be positioned at the sequence, but not stepped into it. | ||
* If this function returns normally, it will be stepped out of the sequence. | ||
* Caller will need to call [IonReader.next] to get the next value. | ||
*/ | ||
private inline fun compileSequence(isQuoted: Boolean, newTemplateBodySequence: (Int, Int) -> TemplateBodyExpression) { | ||
val seqStart = expressions.size | ||
expressions.add(Placeholder) | ||
reader.forEachInContainer { compileTemplateBodyExpression(isQuoted) } | ||
val seqEnd = expressions.lastIndex | ||
expressions[seqStart] = newTemplateBodySequence(seqStart, seqEnd) | ||
} | ||
|
||
/** | ||
* Compiles a macro invocation in a macro template. | ||
* When calling, the reader should be positioned at the sexp, but not stepped into it. | ||
* If this function returns normally, it will be stepped out of the sexp. | ||
* Caller will need to call [IonReader.next] to get the next value. | ||
*/ | ||
private fun compileMacroInvocation() { | ||
reader.stepIn() | ||
val macroRef = when (reader.next()) { | ||
IonType.SYMBOL -> { | ||
val macroName = reader.stringValue() | ||
// TODO: Once we have a macro table, validate name exists in current macro table. | ||
if (macroName == "quote") null else MacroRef.ByName(macroName) | ||
} | ||
// TODO: When we have an ID for the macro "quote", add handling for it here. | ||
// TODO: Once we have a macro table, validate that id exists in current macro table. | ||
IonType.INT -> MacroRef.ById(reader.longValue()) | ||
else -> throw IonException("macro invocation must start with an id (int) or identifier (symbol); found ${reader.type ?: "nothing"}\"") | ||
} | ||
|
||
if (macroRef == null) { | ||
// It's the "quote" macro; skip compiling a macro invocation and just treat all contents as literals | ||
reader.forEachRemaining { compileTemplateBodyExpression(isQuoted = true) } | ||
} else { | ||
val macroStart = expressions.size | ||
expressions.add(Placeholder) | ||
reader.forEachRemaining { compileTemplateBodyExpression(isQuoted = false) } | ||
val macroEnd = expressions.lastIndex | ||
expressions[macroStart] = | ||
MacroInvocation(macroRef, macroStart, macroEnd) | ||
} | ||
reader.stepOut() | ||
} | ||
|
||
// Helper functions | ||
|
||
/** Utility method for checking that annotations are empty or a single array with the given annotations */ | ||
private fun Array<String>.isEmptyOr(text: String): Boolean = isEmpty() || (size == 1 && this[0] == text) | ||
|
||
/** Throws [IonException] if any annotations are on the current value in this [IonReader]. */ | ||
private fun IonReader.confirmNoAnnotations(location: String) { | ||
confirm(typeAnnotations.isEmpty()) { "found annotations on $location" } | ||
} | ||
|
||
/** Moves to the next type and throw [IonException] if it is not the `expected` [IonType]. */ | ||
private fun IonReader.nextAndCheckType(expected: IonType, location: String) { | ||
confirm(next() == expected) { "$location must be a $expected; found ${type ?: "nothing"}" } | ||
} | ||
|
||
/** Steps into a container, executes [block], and steps out. */ | ||
private inline fun IonReader.readContainer(block: IonReader.() -> Unit) { stepIn(); block(); stepOut() } | ||
|
||
/** Executes [block] for each remaining value at the current reader depth. */ | ||
private inline fun IonReader.forEachRemaining(block: IonReader.(IonType) -> Unit) { while (next() != null) { block(type) } } | ||
|
||
/** Steps into a container, executes [block] for each value at that reader depth, and steps out. */ | ||
private inline fun IonReader.forEachInContainer(block: IonReader.(IonType) -> Unit) = readContainer { forEachRemaining(block) } | ||
} |
This file was deleted.
Oops, something went wrong.
79 changes: 79 additions & 0 deletions
79
src/main/java/com/amazon/ion/impl/macro/TemplateBodyExpression.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package com.amazon.ion.impl.macro | ||
|
||
import com.amazon.ion.* | ||
import java.math.BigDecimal | ||
import java.math.BigInteger | ||
|
||
/** | ||
* Represents an expression in the body of a template. | ||
* | ||
* We cannot use [`IonValue`](com.amazon.ion.IonValue) for this because `IonValue` requires references to parent | ||
* containers and to an IonSystem which makes it impractical for reading and writing macros definitions. Furthermore, | ||
* there is information we need to capture that cannot be expressed in the IonValue model, such as macro invocations | ||
* and variable references. | ||
* | ||
* A template body is compiled into a list of expressions, without nesting, for ease and efficiency of evaluating | ||
* e-expressions. Because of this, the container types do not have other values nested in them; rather they contain a | ||
* range that indicates which of the following expressions are part of that container. | ||
*/ | ||
sealed interface TemplateBodyExpression { | ||
// TODO: Special Forms (if_void, for, ...)? | ||
|
||
/** | ||
* A temporary placeholder that is used only while a macro is partially compiled. | ||
*/ | ||
object Placeholder : TemplateBodyExpression | ||
|
||
// Scalars | ||
data class NullValue(val annotations: List<SymbolToken> = emptyList(), val type: IonType) : TemplateBodyExpression | ||
data class BoolValue(val annotations: List<SymbolToken> = emptyList(), val value: Boolean) : TemplateBodyExpression | ||
data class IntValue(val annotations: List<SymbolToken> = emptyList(), val value: Long) : TemplateBodyExpression | ||
data class BigIntValue(val annotations: List<SymbolToken> = emptyList(), val value: BigInteger) : TemplateBodyExpression | ||
data class FloatValue(val annotations: List<SymbolToken> = emptyList(), val value: Double) : TemplateBodyExpression | ||
data class DecimalValue(val annotations: List<SymbolToken> = emptyList(), val value: BigDecimal) : TemplateBodyExpression | ||
data class TimestampValue(val annotations: List<SymbolToken> = emptyList(), val value: Timestamp) : TemplateBodyExpression | ||
data class StringValue(val annotations: List<SymbolToken> = emptyList(), val value: String) : TemplateBodyExpression | ||
data class SymbolValue(val annotations: List<SymbolToken> = emptyList(), val value: SymbolToken) : TemplateBodyExpression | ||
// We must override hashcode and equals in the lob types because `value` is a `byte[]` | ||
data class BlobValue(val annotations: List<SymbolToken> = emptyList(), val value: ByteArray) : TemplateBodyExpression { | ||
override fun hashCode(): Int = annotations.hashCode() * 31 + value.contentHashCode() | ||
override fun equals(other: Any?): Boolean = other is BlobValue && annotations == other.annotations && value.contentEquals(other.value) | ||
} | ||
data class ClobValue(val annotations: List<SymbolToken> = emptyList(), val value: ByteArray) : TemplateBodyExpression { | ||
override fun hashCode(): Int = annotations.hashCode() * 31 + value.contentHashCode() | ||
override fun equals(other: Any?): Boolean = other is ClobValue && annotations == other.annotations && value.contentEquals(other.value) | ||
} | ||
|
||
/** | ||
* An Ion List that could contain variables or macro invocations. | ||
* | ||
* @property startInclusive the index of the first expression of the list (i.e. this instance) | ||
* @property endInclusive the index of the last expression contained in the list | ||
*/ | ||
data class ListValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression | ||
|
||
/** | ||
* An Ion SExp that could contain variables or macro invocations. | ||
*/ | ||
data class SExpValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression | ||
|
||
/** | ||
* An Ion Struct that could contain variables or macro invocations. | ||
*/ | ||
data class StructValue(val annotations: List<SymbolToken> = emptyList(), val startInclusive: Int, val endInclusive: Int, val templateStructIndex: Map<String, List<Int>>) : TemplateBodyExpression | ||
|
||
data class FieldName(val value: SymbolToken) : TemplateBodyExpression | ||
|
||
/** | ||
* A reference to a variable that needs to be expanded. | ||
*/ | ||
data class Variable(val signatureIndex: Int) : TemplateBodyExpression | ||
|
||
/** | ||
* A macro invocation that needs to be expanded. | ||
*/ | ||
data class MacroInvocation(val macro: MacroRef, val startInclusive: Int, val endInclusive: Int) : TemplateBodyExpression | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will probably have to be
Decimal
to support -0...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Decimal
extendsBigDecimal
, so this can be constructed with a-0
decimal, but I can't think of any problem with changing it toDecimal
here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So it ends up making the tests (or other programmatic construction of a template body) more annoying if I change this to
Decimal
. I'm going to leave it for now, and we can revisit it later if it causes problems.