517 lines
15 KiB
C#
517 lines
15 KiB
C#
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
using System;
|
|
|
|
using NUnit.Framework;
|
|
|
|
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
|
|
using Document = Lucene.Net.Documents.Document;
|
|
using Field = Lucene.Net.Documents.Field;
|
|
using IndexReader = Lucene.Net.Index.IndexReader;
|
|
using IndexWriter = Lucene.Net.Index.IndexWriter;
|
|
using Term = Lucene.Net.Index.Term;
|
|
using Directory = Lucene.Net.Store.Directory;
|
|
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
|
|
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
|
|
|
|
namespace Lucene.Net.Search
|
|
{
|
|
|
|
/// <summary> Test of the DisjunctionMaxQuery.
|
|
///
|
|
/// </summary>
|
|
[TestFixture]
|
|
public class TestDisjunctionMaxQuery:LuceneTestCase
|
|
{
|
|
public TestDisjunctionMaxQuery()
|
|
{
|
|
InitBlock();
|
|
}
|
|
private void InitBlock()
|
|
{
|
|
sim = new TestSimilarity();
|
|
}
|
|
|
|
/// <summary>threshold for comparing floats </summary>
|
|
public const float SCORE_COMP_THRESH = 0.000001f;
|
|
|
|
/// <summary> Similarity to eliminate tf, idf and lengthNorm effects to
|
|
/// isolate test case.
|
|
///
|
|
/// <p/>
|
|
/// same as TestRankingSimilarity in TestRanking.zip from
|
|
/// http://issues.apache.org/jira/browse/LUCENE-323
|
|
/// </summary>
|
|
[Serializable]
|
|
private class TestSimilarity:DefaultSimilarity
|
|
{
|
|
|
|
public TestSimilarity()
|
|
{
|
|
}
|
|
public override float Tf(float freq)
|
|
{
|
|
if (freq > 0.0f)
|
|
return 1.0f;
|
|
else
|
|
return 0.0f;
|
|
}
|
|
public override float LengthNorm(System.String fieldName, int numTerms)
|
|
{
|
|
return 1.0f;
|
|
}
|
|
public override float Idf(int docFreq, int numDocs)
|
|
{
|
|
return 1.0f;
|
|
}
|
|
}
|
|
|
|
public Similarity sim;
|
|
public Directory index;
|
|
public IndexReader r;
|
|
public IndexSearcher s;
|
|
|
|
[SetUp]
|
|
public override void SetUp()
|
|
{
|
|
base.SetUp();
|
|
|
|
index = new RAMDirectory();
|
|
IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
|
writer.SetSimilarity(sim);
|
|
|
|
// hed is the most important field, dek is secondary
|
|
|
|
// d1 is an "ok" match for: albino elephant
|
|
{
|
|
Document d1 = new Document();
|
|
d1.Add(new Field("id", "d1", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d1"));
|
|
d1.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant"));
|
|
d1.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant"));
|
|
writer.AddDocument(d1);
|
|
}
|
|
|
|
// d2 is a "good" match for: albino elephant
|
|
{
|
|
Document d2 = new Document();
|
|
d2.Add(new Field("id", "d2", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d2"));
|
|
d2.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant"));
|
|
d2.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino"));
|
|
d2.Add(new Field("dek", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "elephant"));
|
|
writer.AddDocument(d2);
|
|
}
|
|
|
|
// d3 is a "better" match for: albino elephant
|
|
{
|
|
Document d3 = new Document();
|
|
d3.Add(new Field("id", "d3", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d3"));
|
|
d3.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino"));
|
|
d3.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant"));
|
|
writer.AddDocument(d3);
|
|
}
|
|
|
|
// d4 is the "best" match for: albino elephant
|
|
{
|
|
Document d4 = new Document();
|
|
d4.Add(new Field("id", "d4", Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id", "d4"));
|
|
d4.Add(new Field("hed", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "albino"));
|
|
d4.Add(new Field("hed", "elephant", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("hed", "elephant"));
|
|
d4.Add(new Field("dek", "albino", Field.Store.YES, Field.Index.ANALYZED)); //Field.Text("dek", "albino"));
|
|
writer.AddDocument(d4);
|
|
}
|
|
|
|
writer.Close();
|
|
|
|
r = IndexReader.Open(index, true);
|
|
s = new IndexSearcher(r);
|
|
s.Similarity = sim;
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestSkipToFirsttimeMiss()
|
|
{
|
|
DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
|
|
dq.Add(Tq("id", "d1"));
|
|
dq.Add(Tq("dek", "DOES_NOT_EXIST"));
|
|
|
|
QueryUtils.Check(dq, s);
|
|
|
|
Weight dw = dq.Weight(s);
|
|
Scorer ds = dw.Scorer(r, true, false);
|
|
bool skipOk = ds.Advance(3) != DocIdSetIterator.NO_MORE_DOCS;
|
|
if (skipOk)
|
|
{
|
|
Assert.Fail("firsttime skipTo found a match? ... " + r.Document(ds.DocID()).Get("id"));
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestSkipToFirsttimeHit()
|
|
{
|
|
DisjunctionMaxQuery dq = new DisjunctionMaxQuery(0.0f);
|
|
dq.Add(Tq("dek", "albino"));
|
|
dq.Add(Tq("dek", "DOES_NOT_EXIST"));
|
|
|
|
QueryUtils.Check(dq, s);
|
|
|
|
Weight dw = dq.Weight(s);
|
|
Scorer ds = dw.Scorer(r, true, false);
|
|
Assert.IsTrue(ds.Advance(3) != DocIdSetIterator.NO_MORE_DOCS, "firsttime skipTo found no match");
|
|
Assert.AreEqual("d4", r.Document(ds.DocID()).Get("id"), "found wrong docid");
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestSimpleEqualScores1()
|
|
{
|
|
|
|
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
|
|
q.Add(Tq("hed", "albino"));
|
|
q.Add(Tq("hed", "elephant"));
|
|
QueryUtils.Check(q, s);
|
|
|
|
ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
|
|
|
|
try
|
|
{
|
|
Assert.AreEqual(4, h.Length, "all docs should match " + q.ToString());
|
|
|
|
float score = h[0].Score;
|
|
for (int i = 1; i < h.Length; i++)
|
|
{
|
|
Assert.AreEqual(score, h[i].Score, SCORE_COMP_THRESH, "score #" + i + " is not the same");
|
|
}
|
|
}
|
|
catch (System.ApplicationException e)
|
|
{
|
|
PrintHits("testSimpleEqualScores1", h, s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestSimpleEqualScores2()
|
|
{
|
|
|
|
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
|
|
q.Add(Tq("dek", "albino"));
|
|
q.Add(Tq("dek", "elephant"));
|
|
QueryUtils.Check(q, s);
|
|
|
|
|
|
ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
|
|
|
|
try
|
|
{
|
|
Assert.AreEqual(3, h.Length, "3 docs should match " + q.ToString());
|
|
float score = h[0].Score;
|
|
for (int i = 1; i < h.Length; i++)
|
|
{
|
|
Assert.AreEqual(score, h[i].Score, SCORE_COMP_THRESH, "score #" + i + " is not the same");
|
|
}
|
|
}
|
|
catch (System.ApplicationException e)
|
|
{
|
|
PrintHits("testSimpleEqualScores2", h, s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestSimpleEqualScores3()
|
|
{
|
|
|
|
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.0f);
|
|
q.Add(Tq("hed", "albino"));
|
|
q.Add(Tq("hed", "elephant"));
|
|
q.Add(Tq("dek", "albino"));
|
|
q.Add(Tq("dek", "elephant"));
|
|
QueryUtils.Check(q, s);
|
|
|
|
|
|
ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
|
|
|
|
try
|
|
{
|
|
Assert.AreEqual(4, h.Length, "all docs should match " + q.ToString());
|
|
float score = h[0].Score;
|
|
for (int i = 1; i < h.Length; i++)
|
|
{
|
|
Assert.AreEqual(score, h[i].Score, SCORE_COMP_THRESH, "score #" + i + " is not the same");
|
|
}
|
|
}
|
|
catch (System.ApplicationException e)
|
|
{
|
|
PrintHits("testSimpleEqualScores3", h, s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestSimpleTiebreaker()
|
|
{
|
|
|
|
DisjunctionMaxQuery q = new DisjunctionMaxQuery(0.01f);
|
|
q.Add(Tq("dek", "albino"));
|
|
q.Add(Tq("dek", "elephant"));
|
|
QueryUtils.Check(q, s);
|
|
|
|
|
|
ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
|
|
|
|
try
|
|
{
|
|
Assert.AreEqual(3, h.Length, "3 docs should match " + q.ToString());
|
|
Assert.AreEqual(s.Doc(h[0].Doc).Get("id"), "d2", "wrong first");
|
|
float score0 = h[0].Score;
|
|
float score1 = h[1].Score;
|
|
float score2 = h[2].Score;
|
|
Assert.IsTrue(score0 > score1, "d2 does not have better score then others: " + score0 + " >? " + score1);
|
|
Assert.AreEqual(score1, score2, SCORE_COMP_THRESH, "d4 and d1 don't have equal scores");
|
|
}
|
|
catch (System.ApplicationException e)
|
|
{
|
|
PrintHits("testSimpleTiebreaker", h, s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestBooleanRequiredEqualScores()
|
|
{
|
|
|
|
BooleanQuery q = new BooleanQuery();
|
|
{
|
|
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
|
|
q1.Add(Tq("hed", "albino"));
|
|
q1.Add(Tq("dek", "albino"));
|
|
q.Add(q1, Occur.MUST); //true,false);
|
|
QueryUtils.Check(q1, s);
|
|
}
|
|
{
|
|
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
|
|
q2.Add(Tq("hed", "elephant"));
|
|
q2.Add(Tq("dek", "elephant"));
|
|
q.Add(q2, Occur.MUST); //true,false);
|
|
QueryUtils.Check(q2, s);
|
|
}
|
|
|
|
QueryUtils.Check(q, s);
|
|
|
|
ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
|
|
|
|
try
|
|
{
|
|
Assert.AreEqual(3, h.Length, "3 docs should match " + q.ToString());
|
|
float score = h[0].Score;
|
|
for (int i = 1; i < h.Length; i++)
|
|
{
|
|
Assert.AreEqual(score, h[i].Score, SCORE_COMP_THRESH, "score #" + i + " is not the same");
|
|
}
|
|
}
|
|
catch (System.ApplicationException e)
|
|
{
|
|
PrintHits("testBooleanRequiredEqualScores1", h, s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
|
|
[Test]
|
|
public virtual void TestBooleanOptionalNoTiebreaker()
|
|
{
|
|
|
|
BooleanQuery q = new BooleanQuery();
|
|
{
|
|
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.0f);
|
|
q1.Add(Tq("hed", "albino"));
|
|
q1.Add(Tq("dek", "albino"));
|
|
q.Add(q1, Occur.SHOULD); //false,false);
|
|
}
|
|
{
|
|
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.0f);
|
|
q2.Add(Tq("hed", "elephant"));
|
|
q2.Add(Tq("dek", "elephant"));
|
|
q.Add(q2, Occur.SHOULD); //false,false);
|
|
}
|
|
QueryUtils.Check(q, s);
|
|
|
|
|
|
ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
|
|
|
|
try
|
|
{
|
|
Assert.AreEqual(4, h.Length, "4 docs should match " + q.ToString());
|
|
float score = h[0].Score;
|
|
for (int i = 1; i < h.Length - 1; i++)
|
|
{
|
|
/* note: -1 */
|
|
Assert.AreEqual(score, h[i].Score, SCORE_COMP_THRESH, "score #" + i + " is not the same");
|
|
}
|
|
Assert.AreEqual("d1", s.Doc(h[h.Length - 1].Doc).Get("id"), "wrong last");
|
|
float score1 = h[h.Length - 1].Score;
|
|
Assert.IsTrue(score > score1, "d1 does not have worse score then others: " + score + " >? " + score1);
|
|
}
|
|
catch (System.ApplicationException e)
|
|
{
|
|
PrintHits("testBooleanOptionalNoTiebreaker", h, s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
|
|
[Test]
|
|
public virtual void TestBooleanOptionalWithTiebreaker()
|
|
{
|
|
|
|
BooleanQuery q = new BooleanQuery();
|
|
{
|
|
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
|
|
q1.Add(Tq("hed", "albino"));
|
|
q1.Add(Tq("dek", "albino"));
|
|
q.Add(q1, Occur.SHOULD); //false,false);
|
|
}
|
|
{
|
|
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
|
|
q2.Add(Tq("hed", "elephant"));
|
|
q2.Add(Tq("dek", "elephant"));
|
|
q.Add(q2, Occur.SHOULD); //false,false);
|
|
}
|
|
QueryUtils.Check(q, s);
|
|
|
|
|
|
ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
|
|
|
|
try
|
|
{
|
|
|
|
Assert.AreEqual(4, h.Length, "4 docs should match " + q.ToString());
|
|
|
|
float score0 = h[0].Score;
|
|
float score1 = h[1].Score;
|
|
float score2 = h[2].Score;
|
|
float score3 = h[3].Score;
|
|
|
|
System.String doc0 = s.Doc(h[0].Doc).Get("id");
|
|
System.String doc1 = s.Doc(h[1].Doc).Get("id");
|
|
System.String doc2 = s.Doc(h[2].Doc).Get("id");
|
|
System.String doc3 = s.Doc(h[3].Doc).Get("id");
|
|
|
|
Assert.IsTrue(doc0.Equals("d2") || doc0.Equals("d4"), "doc0 should be d2 or d4: " + doc0);
|
|
Assert.IsTrue(doc1.Equals("d2") || doc1.Equals("d4"), "doc1 should be d2 or d4: " + doc0);
|
|
Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "score0 and score1 should match");
|
|
Assert.AreEqual("d3", doc2, "wrong third");
|
|
Assert.IsTrue(score1 > score2, "d3 does not have worse score then d2 and d4: " + score1 + " >? " + score2);
|
|
|
|
Assert.AreEqual("d1", doc3, "wrong fourth");
|
|
Assert.IsTrue(score2 > score3, "d1 does not have worse score then d3: " + score2 + " >? " + score3);
|
|
}
|
|
catch (System.ApplicationException e)
|
|
{
|
|
PrintHits("testBooleanOptionalWithTiebreaker", h, s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
|
|
[Test]
|
|
public virtual void TestBooleanOptionalWithTiebreakerAndBoost()
|
|
{
|
|
|
|
BooleanQuery q = new BooleanQuery();
|
|
{
|
|
DisjunctionMaxQuery q1 = new DisjunctionMaxQuery(0.01f);
|
|
q1.Add(Tq("hed", "albino", 1.5f));
|
|
q1.Add(Tq("dek", "albino"));
|
|
q.Add(q1, Occur.SHOULD); //false,false);
|
|
}
|
|
{
|
|
DisjunctionMaxQuery q2 = new DisjunctionMaxQuery(0.01f);
|
|
q2.Add(Tq("hed", "elephant", 1.5f));
|
|
q2.Add(Tq("dek", "elephant"));
|
|
q.Add(q2, Occur.SHOULD); //false,false);
|
|
}
|
|
QueryUtils.Check(q, s);
|
|
|
|
|
|
ScoreDoc[] h = s.Search(q, null, 1000).ScoreDocs;
|
|
|
|
try
|
|
{
|
|
|
|
Assert.AreEqual(4, h.Length, "4 docs should match " + q.ToString());
|
|
|
|
float score0 = h[0].Score;
|
|
float score1 = h[1].Score;
|
|
float score2 = h[2].Score;
|
|
float score3 = h[3].Score;
|
|
|
|
System.String doc0 = s.Doc(h[0].Doc).Get("id");
|
|
System.String doc1 = s.Doc(h[1].Doc).Get("id");
|
|
System.String doc2 = s.Doc(h[2].Doc).Get("id");
|
|
System.String doc3 = s.Doc(h[3].Doc).Get("id");
|
|
|
|
Assert.AreEqual("d4", doc0, "doc0 should be d4: ");
|
|
Assert.AreEqual("d3", doc1, "doc1 should be d3: ");
|
|
Assert.AreEqual("d2", doc2, "doc2 should be d2: ");
|
|
Assert.AreEqual("d1", doc3, "doc3 should be d1: ");
|
|
|
|
Assert.IsTrue(score0 > score1, "d4 does not have a better score then d3: " + score0 + " >? " + score1);
|
|
Assert.IsTrue(score1 > score2, "d3 does not have a better score then d2: " + score1 + " >? " + score2);
|
|
Assert.IsTrue(score2 > score3, "d3 does not have a better score then d1: " + score2 + " >? " + score3);
|
|
}
|
|
catch (System.ApplicationException e)
|
|
{
|
|
PrintHits("testBooleanOptionalWithTiebreakerAndBoost", h, s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>macro </summary>
|
|
protected internal virtual Query Tq(System.String f, System.String t)
|
|
{
|
|
return new TermQuery(new Term(f, t));
|
|
}
|
|
/// <summary>macro </summary>
|
|
protected internal virtual Query Tq(System.String f, System.String t, float b)
|
|
{
|
|
Query q = Tq(f, t);
|
|
q.Boost = b;
|
|
return q;
|
|
}
|
|
|
|
|
|
protected internal virtual void PrintHits(System.String test, ScoreDoc[] h, Searcher searcher)
|
|
{
|
|
|
|
System.Console.Error.WriteLine("------- " + test + " -------");
|
|
|
|
for (int i = 0; i < h.Length; i++)
|
|
{
|
|
Document d = searcher.Doc(h[i].Doc);
|
|
float score = h[i].Score;
|
|
System.Console.Error.WriteLine("#" + i + ": {0.000000000}" + score + " - " + d.Get("id"));
|
|
}
|
|
}
|
|
}
|
|
} |