From 39f681812b973a8267d12b0e830a6dc21ab382be Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Sat, 17 Feb 2024 09:03:08 -0500 Subject: [PATCH] Adding a getter to FeatureField to allow value access (#13111) Getting the access of a FeatureField#value is useful for deduplicating. If you have a sparse vector model and you want to handle multiple inputs from them, you want flexibility in how you de-duplicate the feature dimensions. --- .../java/org/apache/lucene/document/FeatureField.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java index d70ead22d837..ad78b375f884 100644 --- a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java +++ b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java @@ -166,6 +166,16 @@ public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { return stream; } + /** + * This is useful if you have multiple features sharing a name and you want to take action to + * deduplicate them. + * + * @return the feature value of this field. + */ + public float getFeatureValue() { + return featureValue; + } + private static final class FeatureTokenStream extends TokenStream { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final TermFrequencyAttribute freqAttribute = addAttribute(TermFrequencyAttribute.class);