395 lines
16 KiB
C#
395 lines
16 KiB
C#
|
/*
|
|||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|||
|
* contributor license agreements. See the NOTICE file distributed with
|
|||
|
* this work for additional information regarding copyright ownership.
|
|||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|||
|
* (the "License"); you may not use this file except in compliance with
|
|||
|
* the License. You may obtain a copy of the License at
|
|||
|
*
|
|||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|||
|
*
|
|||
|
* Unless required by applicable law or agreed to in writing, software
|
|||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
|
* See the License for the specific language governing permissions and
|
|||
|
* limitations under the License.
|
|||
|
*/
|
|||
|
|
|||
|
using System;
|
|||
|
using Lucene.Net.Analysis.Tokenattributes;
|
|||
|
using NUnit.Framework;
|
|||
|
|
|||
|
using Analyzer = Lucene.Net.Analysis.Analyzer;
|
|||
|
using TokenStream = Lucene.Net.Analysis.TokenStream;
|
|||
|
using Tokenizer = Lucene.Net.Analysis.Tokenizer;
|
|||
|
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
|
|||
|
using Document = Lucene.Net.Documents.Document;
|
|||
|
using Field = Lucene.Net.Documents.Field;
|
|||
|
using IndexWriter = Lucene.Net.Index.IndexWriter;
|
|||
|
using Term = Lucene.Net.Index.Term;
|
|||
|
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
|
|||
|
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
|
|||
|
|
|||
|
namespace Lucene.Net.Search
|
|||
|
{
|
|||
|
[TestFixture]
|
|||
|
public class TestTermRangeQuery:LuceneTestCase
|
|||
|
{
|
|||
|
private int docCount = 0;
|
|||
|
private RAMDirectory dir;
|
|||
|
|
|||
|
[SetUp]
|
|||
|
public override void SetUp()
|
|||
|
{
|
|||
|
base.SetUp();
|
|||
|
dir = new RAMDirectory();
|
|||
|
}
|
|||
|
|
|||
|
[Test]
|
|||
|
public virtual void TestExclusive()
|
|||
|
{
|
|||
|
Query query = new TermRangeQuery("content", "A", "C", false, false);
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "C", "D"});
|
|||
|
IndexSearcher searcher = new IndexSearcher(dir, true);
|
|||
|
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(1, hits.Length, "A,B,C,D, only B in range");
|
|||
|
searcher.Close();
|
|||
|
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "D"});
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(1, hits.Length, "A,B,D, only B in range");
|
|||
|
searcher.Close();
|
|||
|
|
|||
|
AddDoc("C");
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(1, hits.Length, "C added, still only B in range");
|
|||
|
searcher.Close();
|
|||
|
}
|
|||
|
|
|||
|
[Test]
|
|||
|
public virtual void TestInclusive()
|
|||
|
{
|
|||
|
Query query = new TermRangeQuery("content", "A", "C", true, true);
|
|||
|
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "C", "D"});
|
|||
|
IndexSearcher searcher = new IndexSearcher(dir, true);
|
|||
|
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(3, hits.Length, "A,B,C,D - A,B,C in range");
|
|||
|
searcher.Close();
|
|||
|
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "D"});
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(2, hits.Length, "A,B,D - A and B in range");
|
|||
|
searcher.Close();
|
|||
|
|
|||
|
AddDoc("C");
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(3, hits.Length, "C added - A, B, C in range");
|
|||
|
searcher.Close();
|
|||
|
}
|
|||
|
|
|||
|
[Test]
|
|||
|
public virtual void TestEqualsHashcode()
|
|||
|
{
|
|||
|
Query query = new TermRangeQuery("content", "A", "C", true, true);
|
|||
|
|
|||
|
query.Boost = 1.0f;
|
|||
|
Query other = new TermRangeQuery("content", "A", "C", true, true);
|
|||
|
other.Boost = 1.0f;
|
|||
|
|
|||
|
Assert.AreEqual(query, query, "query equals itself is true");
|
|||
|
Assert.AreEqual(query, other, "equivalent queries are equal");
|
|||
|
Assert.AreEqual(query.GetHashCode(), other.GetHashCode(), "hashcode must return same value when equals is true");
|
|||
|
|
|||
|
other.Boost = 2.0f;
|
|||
|
Assert.IsFalse(query.Equals(other), "Different boost queries are not equal");
|
|||
|
|
|||
|
other = new TermRangeQuery("notcontent", "A", "C", true, true);
|
|||
|
Assert.IsFalse(query.Equals(other), "Different fields are not equal");
|
|||
|
|
|||
|
other = new TermRangeQuery("content", "X", "C", true, true);
|
|||
|
Assert.IsFalse(query.Equals(other), "Different lower terms are not equal");
|
|||
|
|
|||
|
other = new TermRangeQuery("content", "A", "Z", true, true);
|
|||
|
Assert.IsFalse(query.Equals(other), "Different upper terms are not equal");
|
|||
|
|
|||
|
query = new TermRangeQuery("content", null, "C", true, true);
|
|||
|
other = new TermRangeQuery("content", null, "C", true, true);
|
|||
|
Assert.AreEqual(query, other, "equivalent queries with null lowerterms are equal()");
|
|||
|
Assert.AreEqual(query.GetHashCode(), other.GetHashCode(), "hashcode must return same value when equals is true");
|
|||
|
|
|||
|
query = new TermRangeQuery("content", "C", null, true, true);
|
|||
|
other = new TermRangeQuery("content", "C", null, true, true);
|
|||
|
Assert.AreEqual(query, other, "equivalent queries with null upperterms are equal()");
|
|||
|
Assert.AreEqual(query.GetHashCode(), other.GetHashCode(), "hashcode returns same value");
|
|||
|
|
|||
|
query = new TermRangeQuery("content", null, "C", true, true);
|
|||
|
other = new TermRangeQuery("content", "C", null, true, true);
|
|||
|
Assert.IsFalse(query.Equals(other), "queries with different upper and lower terms are not equal");
|
|||
|
|
|||
|
query = new TermRangeQuery("content", "A", "C", false, false);
|
|||
|
other = new TermRangeQuery("content", "A", "C", true, true);
|
|||
|
Assert.IsFalse(query.Equals(other), "queries with different inclusive are not equal");
|
|||
|
|
|||
|
query = new TermRangeQuery("content", "A", "C", false, false);
|
|||
|
other = new TermRangeQuery("content", "A", "C", false, false, System.Globalization.CultureInfo.CurrentCulture.CompareInfo);
|
|||
|
Assert.IsFalse(query.Equals(other), "a query with a collator is not equal to one without");
|
|||
|
}
|
|||
|
|
|||
|
[Test]
|
|||
|
public virtual void TestExclusiveCollating()
|
|||
|
{
|
|||
|
Query query = new TermRangeQuery("content", "A", "C", false, false, new System.Globalization.CultureInfo("en").CompareInfo);
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "C", "D"});
|
|||
|
IndexSearcher searcher = new IndexSearcher(dir, true);
|
|||
|
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(1, hits.Length, "A,B,C,D, only B in range");
|
|||
|
searcher.Close();
|
|||
|
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "D"});
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(1, hits.Length, "A,B,D, only B in range");
|
|||
|
searcher.Close();
|
|||
|
|
|||
|
AddDoc("C");
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(1, hits.Length, "C added, still only B in range");
|
|||
|
searcher.Close();
|
|||
|
}
|
|||
|
|
|||
|
[Test]
|
|||
|
public virtual void TestInclusiveCollating()
|
|||
|
{
|
|||
|
Query query = new TermRangeQuery("content", "A", "C", true, true, new System.Globalization.CultureInfo("en").CompareInfo);
|
|||
|
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "C", "D"});
|
|||
|
IndexSearcher searcher = new IndexSearcher(dir, true);
|
|||
|
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(3, hits.Length, "A,B,C,D - A,B,C in range");
|
|||
|
searcher.Close();
|
|||
|
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "D"});
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(2, hits.Length, "A,B,D - A and B in range");
|
|||
|
searcher.Close();
|
|||
|
|
|||
|
AddDoc("C");
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(3, hits.Length, "C added - A, B, C in range");
|
|||
|
searcher.Close();
|
|||
|
}
|
|||
|
|
|||
|
[Test]
|
|||
|
public virtual void TestFarsi()
|
|||
|
{
|
|||
|
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
|||
|
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
|||
|
// characters properly.
|
|||
|
System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo;
|
|||
|
Query query = new TermRangeQuery("content", "\u062F", "\u0698", true, true, collator);
|
|||
|
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
|||
|
// orders the U+0698 character before the U+0633 character, so the single
|
|||
|
// index Term below should NOT be returned by a TermRangeQuery with a Farsi
|
|||
|
// Collator (or an Arabic one for the case when Farsi is not supported).
|
|||
|
InitializeIndex(new System.String[]{"\u0633\u0627\u0628"});
|
|||
|
IndexSearcher searcher = new IndexSearcher(dir, true);
|
|||
|
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(0, hits.Length, "The index Term should not be included.");
|
|||
|
|
|||
|
query = new TermRangeQuery("content", "\u0633", "\u0638", true, true, collator);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(1, hits.Length, "The index Term should be included.");
|
|||
|
searcher.Close();
|
|||
|
}
|
|||
|
|
|||
|
[Test]
|
|||
|
public virtual void TestDanish()
|
|||
|
{
|
|||
|
System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo;
|
|||
|
// Danish collation orders the words below in the given order (example taken
|
|||
|
// from TestSort.testInternationalSort() ).
|
|||
|
System.String[] words = new System.String[]{"H\u00D8T", "H\u00C5T", "MAND"};
|
|||
|
Query query = new TermRangeQuery("content", "H\u00D8T", "MAND", false, false, collator);
|
|||
|
|
|||
|
// Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
|
|||
|
// but Danish collation does.
|
|||
|
InitializeIndex(words);
|
|||
|
IndexSearcher searcher = new IndexSearcher(dir, true);
|
|||
|
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(1, hits.Length, "The index Term should be included.");
|
|||
|
|
|||
|
query = new TermRangeQuery("content", "H\u00C5T", "MAND", false, false, collator);
|
|||
|
hits = searcher.Search(query, null, 1000).ScoreDocs;
|
|||
|
Assert.AreEqual(0, hits.Length, "The index Term should not be included.");
|
|||
|
searcher.Close();
|
|||
|
}
|
|||
|
|
|||
|
private class SingleCharAnalyzer:Analyzer
|
|||
|
{
|
|||
|
|
|||
|
private class SingleCharTokenizer:Tokenizer
|
|||
|
{
|
|||
|
internal char[] buffer = new char[1];
|
|||
|
internal bool done;
|
|||
|
internal ITermAttribute termAtt;
|
|||
|
|
|||
|
public SingleCharTokenizer(System.IO.TextReader r):base(r)
|
|||
|
{
|
|||
|
termAtt = AddAttribute<ITermAttribute>();
|
|||
|
}
|
|||
|
|
|||
|
public override bool IncrementToken()
|
|||
|
{
|
|||
|
int count = input.Read(buffer, 0, buffer.Length);
|
|||
|
if (done)
|
|||
|
return false;
|
|||
|
else
|
|||
|
{
|
|||
|
ClearAttributes();
|
|||
|
done = true;
|
|||
|
if (count == 1)
|
|||
|
{
|
|||
|
termAtt.TermBuffer()[0] = buffer[0];
|
|||
|
termAtt.SetTermLength(1);
|
|||
|
}
|
|||
|
else
|
|||
|
termAtt.SetTermLength(0);
|
|||
|
return true;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
public override void Reset(System.IO.TextReader reader)
|
|||
|
{
|
|||
|
base.Reset(reader);
|
|||
|
done = false;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
|
|||
|
{
|
|||
|
Tokenizer tokenizer = (Tokenizer) PreviousTokenStream;
|
|||
|
if (tokenizer == null)
|
|||
|
{
|
|||
|
tokenizer = new SingleCharTokenizer(reader);
|
|||
|
PreviousTokenStream = tokenizer;
|
|||
|
}
|
|||
|
else
|
|||
|
tokenizer.Reset(reader);
|
|||
|
return tokenizer;
|
|||
|
}
|
|||
|
|
|||
|
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
|
|||
|
{
|
|||
|
return new SingleCharTokenizer(reader);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
private void InitializeIndex(System.String[] values)
|
|||
|
{
|
|||
|
InitializeIndex(values, new WhitespaceAnalyzer());
|
|||
|
}
|
|||
|
|
|||
|
private void InitializeIndex(System.String[] values, Analyzer analyzer)
|
|||
|
{
|
|||
|
IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
|||
|
for (int i = 0; i < values.Length; i++)
|
|||
|
{
|
|||
|
InsertDoc(writer, values[i]);
|
|||
|
}
|
|||
|
writer.Close();
|
|||
|
}
|
|||
|
|
|||
|
private void AddDoc(System.String content)
|
|||
|
{
|
|||
|
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
|
|||
|
InsertDoc(writer, content);
|
|||
|
writer.Close();
|
|||
|
}
|
|||
|
|
|||
|
private void InsertDoc(IndexWriter writer, System.String content)
|
|||
|
{
|
|||
|
Document doc = new Document();
|
|||
|
|
|||
|
doc.Add(new Field("id", "id" + docCount, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
|||
|
doc.Add(new Field("content", content, Field.Store.NO, Field.Index.ANALYZED));
|
|||
|
|
|||
|
writer.AddDocument(doc);
|
|||
|
docCount++;
|
|||
|
}
|
|||
|
|
|||
|
// LUCENE-38
|
|||
|
[Test]
|
|||
|
public virtual void TestExclusiveLowerNull()
|
|||
|
{
|
|||
|
Analyzer analyzer = new SingleCharAnalyzer();
|
|||
|
//http://issues.apache.org/jira/browse/LUCENE-38
|
|||
|
Query query = new TermRangeQuery("content", null, "C", false, false);
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "", "C", "D"}, analyzer);
|
|||
|
IndexSearcher searcher = new IndexSearcher(dir, true);
|
|||
|
int numHits = searcher.Search(query, null, 1000).TotalHits;
|
|||
|
// When Lucene-38 is fixed, use the assert on the next line:
|
|||
|
Assert.AreEqual(3, numHits, "A,B,<empty string>,C,D => A, B & <empty string> are in range");
|
|||
|
// until Lucene-38 is fixed, use this assert:
|
|||
|
//Assert.AreEqual(2, hits.length(),"A,B,<empty string>,C,D => A, B & <empty string> are in range");
|
|||
|
|
|||
|
searcher.Close();
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "", "D"}, analyzer);
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
numHits = searcher.Search(query, null, 1000).TotalHits;
|
|||
|
// When Lucene-38 is fixed, use the assert on the next line:
|
|||
|
Assert.AreEqual(3, numHits, "A,B,<empty string>,D => A, B & <empty string> are in range");
|
|||
|
// until Lucene-38 is fixed, use this assert:
|
|||
|
//Assert.AreEqual(2, hits.length(), "A,B,<empty string>,D => A, B & <empty string> are in range");
|
|||
|
searcher.Close();
|
|||
|
AddDoc("C");
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
numHits = searcher.Search(query, null, 1000).TotalHits;
|
|||
|
// When Lucene-38 is fixed, use the assert on the next line:
|
|||
|
Assert.AreEqual(3, numHits, "C added, still A, B & <empty string> are in range");
|
|||
|
// until Lucene-38 is fixed, use this assert
|
|||
|
//Assert.AreEqual(2, hits.length(), "C added, still A, B & <empty string> are in range");
|
|||
|
searcher.Close();
|
|||
|
}
|
|||
|
|
|||
|
// LUCENE-38
|
|||
|
[Test]
|
|||
|
public virtual void TestInclusiveLowerNull()
|
|||
|
{
|
|||
|
//http://issues.apache.org/jira/browse/LUCENE-38
|
|||
|
Analyzer analyzer = new SingleCharAnalyzer();
|
|||
|
Query query = new TermRangeQuery("content", null, "C", true, true);
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "", "C", "D"}, analyzer);
|
|||
|
IndexSearcher searcher = new IndexSearcher(dir, true);
|
|||
|
int numHits = searcher.Search(query, null, 1000).TotalHits;
|
|||
|
// When Lucene-38 is fixed, use the assert on the next line:
|
|||
|
Assert.AreEqual(4, numHits, "A,B,<empty string>,C,D => A,B,<empty string>,C in range");
|
|||
|
// until Lucene-38 is fixed, use this assert
|
|||
|
//Assert.AreEqual(3, hits.length(), "A,B,<empty string>,C,D => A,B,<empty string>,C in range");
|
|||
|
searcher.Close();
|
|||
|
InitializeIndex(new System.String[]{"A", "B", "", "D"}, analyzer);
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
numHits = searcher.Search(query, null, 1000).TotalHits;
|
|||
|
// When Lucene-38 is fixed, use the assert on the next line:
|
|||
|
Assert.AreEqual(3, numHits, "A,B,<empty string>,D - A, B and <empty string> in range");
|
|||
|
// until Lucene-38 is fixed, use this assert
|
|||
|
//Assert.AreEqual(2, hits.length(), "A,B,<empty string>,D => A, B and <empty string> in range");
|
|||
|
searcher.Close();
|
|||
|
AddDoc("C");
|
|||
|
searcher = new IndexSearcher(dir, true);
|
|||
|
numHits = searcher.Search(query, null, 1000).TotalHits;
|
|||
|
// When Lucene-38 is fixed, use the assert on the next line:
|
|||
|
Assert.AreEqual(4, numHits, "C added => A,B,<empty string>,C in range");
|
|||
|
// until Lucene-38 is fixed, use this assert
|
|||
|
//Assert.AreEqual(3, hits.length(), "C added => A,B,<empty string>,C in range");
|
|||
|
searcher.Close();
|
|||
|
}
|
|||
|
}
|
|||
|
}
|