Skip to content

Commit

Permalink
PR 13757 follow-up: add missing with-discountOverlaps Similarity cons…
Browse files Browse the repository at this point in the history
…tructor variants, CHANGES.txt entries (#13845) (#13858)

(cherry picked from commit dab7311)
(cherry picked from commit cbd8b52)

Resolved Conflicts:
	lucene/CHANGES.txt
  • Loading branch information
cpoerschke authored Oct 9, 2024
1 parent a4c0f74 commit 0a57309
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 7 deletions.
5 changes: 5 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ API Changes
unifies Byte/FloatVectorValues incorporating RandomAccess* API and introduces
DocIndexIterator for iterative access in place of direct inheritance from DISI. (Michael Sokolov)

* GITHUB#13845: Add missing with-discountOverlaps Similarity constructor variants. (Pierre Salagnac, Christine Poerschke, Robert Muir)

New Features
---------------------

Expand Down Expand Up @@ -323,6 +325,9 @@ API Changes
* GITHUB#13568, GITHUB#13750: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions
or drill-down. (Egor Potemkin)

* GITHUB#13757: For similarities, provide default computeNorm implementation and remove remaining discountOverlaps setters.
(Christine Poerschke, Adrien Grand, Robert Muir)

New Features
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,26 @@ public abstract class Axiomatic extends SimilarityBase {
protected final int queryLen;

/**
* Constructor setting all Axiomatic hyperparameters
* Constructor setting all Axiomatic hyperparameters and using default discountOverlaps value.
*
* @param s hyperparam for the growth function
* @param queryLen the query length
* @param k hyperparam for the primitive weighting function
*/
public Axiomatic(float s, int queryLen, float k) {
this(true, s, queryLen, k);
}

/**
* Constructor setting all Axiomatic hyperparameters
*
* @param discountOverlaps true if overlap tokens should not impact document length for scoring.
* @param s hyperparam for the growth function
* @param queryLen the query length
* @param k hyperparam for the primitive weighting function
*/
public Axiomatic(boolean discountOverlaps, float s, int queryLen, float k) {
super(discountOverlaps);
if (Float.isFinite(s) == false || Float.isNaN(s) || s < 0 || s > 1) {
throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,23 @@ public class DFISimilarity extends SimilarityBase {
private final Independence independence;

/**
* Create DFI with the specified divergence from independence measure
* Create DFI with the specified divergence from independence measure and using default
* discountOverlaps value
*
* @param independenceMeasure measure of divergence from independence
*/
public DFISimilarity(Independence independenceMeasure) {
this(independenceMeasure, true);
}

/**
* Create DFI with the specified parameters
*
* @param independenceMeasure measure of divergence from independence
* @param discountOverlaps true if overlap tokens should not impact document length for scoring.
*/
public DFISimilarity(Independence independenceMeasure, boolean discountOverlaps) {
super(discountOverlaps);
this.independence = independenceMeasure;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public class DFRSimilarity extends SimilarityBase {
protected final Normalization normalization;

/**
* Creates DFRSimilarity from the three components.
* Creates DFRSimilarity from the three components and using default discountOverlaps value.
*
* <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
Expand All @@ -98,7 +98,7 @@ public DFRSimilarity(
}

/**
* Creates DFRSimilarity from the three components.
* Creates DFRSimilarity from the three components and with the specified discountOverlaps value.
*
* <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public class IBSimilarity extends SimilarityBase {
protected final Normalization normalization;

/**
* Creates IBSimilarity from the three components.
* Creates IBSimilarity from the three components and using default discountOverlaps value.
*
* <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
Expand All @@ -86,6 +86,26 @@ public class IBSimilarity extends SimilarityBase {
* @param normalization term frequency normalization
*/
public IBSimilarity(Distribution distribution, Lambda lambda, Normalization normalization) {
this(distribution, lambda, normalization, true);
}

/**
* Creates IBSimilarity from the three components and with the specified discountOverlaps value.
*
* <p>Note that <code>null</code> values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
*
* @param distribution probabilistic distribution modeling term occurrence
* @param lambda distribution's &lambda;<sub>w</sub> parameter
* @param normalization term frequency normalization
* @param discountOverlaps true if overlap tokens should not impact document length for scoring.
*/
public IBSimilarity(
Distribution distribution,
Lambda lambda,
Normalization normalization,
boolean discountOverlaps) {
super(discountOverlaps);
this.distribution = distribution;
this.lambda = lambda;
this.normalization = normalization;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ public class IndriDirichletSimilarity extends LMSimilarity {
/** The &mu; parameter. */
private final float mu;

/** Instantiates the similarity with the provided parameters. */
public IndriDirichletSimilarity(
CollectionModel collectionModel, boolean discountOverlaps, float mu) {
super(collectionModel, discountOverlaps);
this.mu = mu;
}

/** Instantiates the similarity with the provided &mu; parameter. */
public IndriDirichletSimilarity(CollectionModel collectionModel, float mu) {
super(collectionModel);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,13 @@ public class LMDirichletSimilarity extends LMSimilarity {

/** Instantiates the similarity with the provided &mu; parameter. */
public LMDirichletSimilarity(CollectionModel collectionModel, float mu) {
super(collectionModel);
this(collectionModel, true, mu);
}

/** Instantiates the similarity with the provided parameters. */
public LMDirichletSimilarity(
CollectionModel collectionModel, boolean discountOverlaps, float mu) {
super(collectionModel, discountOverlaps);
if (Float.isFinite(mu) == false || mu < 0) {
throw new IllegalArgumentException(
"illegal mu value: " + mu + ", must be a non-negative finite value");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {

/** Instantiates with the specified collectionModel and &lambda; parameter. */
public LMJelinekMercerSimilarity(CollectionModel collectionModel, float lambda) {
super(collectionModel);
this(collectionModel, true, lambda);
}

/** Instantiates with the specified collectionModel and parameters. */
public LMJelinekMercerSimilarity(
CollectionModel collectionModel, boolean discountOverlaps, float lambda) {
super(collectionModel, discountOverlaps);
if (Float.isNaN(lambda) || lambda <= 0 || lambda > 1) {
throw new IllegalArgumentException("lambda must be in the range (0 .. 1]");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ public abstract class LMSimilarity extends SimilarityBase {

/** Creates a new instance with the specified collection language model. */
public LMSimilarity(CollectionModel collectionModel) {
this(collectionModel, true);
}

/** Creates a new instance with the specified collection language model and discountOverlaps. */
public LMSimilarity(CollectionModel collectionModel, boolean discountOverlaps) {
super(discountOverlaps);
this.collectionModel = collectionModel;
}

Expand Down

0 comments on commit 0a57309

Please sign in to comment.