426 lines
15 KiB
C#
426 lines
15 KiB
C#
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using Lucene.Net.Analysis;
|
|
using Lucene.Net.Analysis.Tokenattributes;
|
|
using Lucene.Net.Util;
|
|
using NUnit.Framework;
|
|
|
|
using Analyzer = Lucene.Net.Analysis.Analyzer;
|
|
using LowerCaseTokenizer = Lucene.Net.Analysis.LowerCaseTokenizer;
|
|
using StopFilter = Lucene.Net.Analysis.StopFilter;
|
|
using TokenFilter = Lucene.Net.Analysis.TokenFilter;
|
|
using TokenStream = Lucene.Net.Analysis.TokenStream;
|
|
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
|
|
using Document = Lucene.Net.Documents.Document;
|
|
using Field = Lucene.Net.Documents.Field;
|
|
using IndexReader = Lucene.Net.Index.IndexReader;
|
|
using IndexWriter = Lucene.Net.Index.IndexWriter;
|
|
using Payload = Lucene.Net.Index.Payload;
|
|
using Term = Lucene.Net.Index.Term;
|
|
using TermPositions = Lucene.Net.Index.TermPositions;
|
|
using QueryParser = Lucene.Net.QueryParsers.QueryParser;
|
|
using Directory = Lucene.Net.Store.Directory;
|
|
using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory;
|
|
using BaseTokenStreamTestCase = Lucene.Net.Test.Analysis.BaseTokenStreamTestCase;
|
|
using PayloadSpanUtil = Lucene.Net.Search.Payloads.PayloadSpanUtil;
|
|
using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
|
|
using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
|
|
using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;
|
|
|
|
namespace Lucene.Net.Search
|
|
{
|
|
|
|
/// <summary>Term position unit test.</summary>
|
|
public class TestPositionIncrement : LuceneTestCase
|
|
{
|
|
private class AnonymousClassAnalyzer:Analyzer
|
|
{
|
|
public AnonymousClassAnalyzer(TestPositionIncrement enclosingInstance)
|
|
{
|
|
InitBlock(enclosingInstance);
|
|
}
|
|
private class AnonymousClassTokenStream:TokenStream
|
|
{
|
|
public AnonymousClassTokenStream(AnonymousClassAnalyzer enclosingInstance)
|
|
{
|
|
InitBlock(enclosingInstance);
|
|
}
|
|
private void InitBlock(AnonymousClassAnalyzer enclosingInstance)
|
|
{
|
|
this.enclosingInstance = enclosingInstance;
|
|
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
|
|
termAtt = AddAttribute<ITermAttribute>();
|
|
offsetAtt = AddAttribute<IOffsetAttribute>();
|
|
}
|
|
private AnonymousClassAnalyzer enclosingInstance;
|
|
public AnonymousClassAnalyzer Enclosing_Instance
|
|
{
|
|
get
|
|
{
|
|
return enclosingInstance;
|
|
}
|
|
|
|
}
|
|
private System.String[] TOKENS = new System.String[]{"1", "2", "3", "4", "5"};
|
|
private int[] INCREMENTS = new int[]{0, 2, 1, 0, 1};
|
|
private int i = 0;
|
|
|
|
internal IPositionIncrementAttribute posIncrAtt;
|
|
internal ITermAttribute termAtt;
|
|
internal IOffsetAttribute offsetAtt;
|
|
|
|
protected override void Dispose(bool disposing)
|
|
{
|
|
// do nothing
|
|
}
|
|
|
|
public override bool IncrementToken()
|
|
{
|
|
if (i == TOKENS.Length)
|
|
return false;
|
|
ClearAttributes();
|
|
termAtt.SetTermBuffer(TOKENS[i]);
|
|
offsetAtt.SetOffset(i, i);
|
|
posIncrAtt.PositionIncrement = INCREMENTS[i];
|
|
i++;
|
|
return true;
|
|
}
|
|
}
|
|
private void InitBlock(TestPositionIncrement enclosingInstance)
|
|
{
|
|
this.enclosingInstance = enclosingInstance;
|
|
}
|
|
private TestPositionIncrement enclosingInstance;
|
|
public TestPositionIncrement Enclosing_Instance
|
|
{
|
|
get
|
|
{
|
|
return enclosingInstance;
|
|
}
|
|
|
|
}
|
|
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
|
|
{
|
|
return new AnonymousClassTokenStream(this);
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestSetPosition()
|
|
{
|
|
Analyzer analyzer = new AnonymousClassAnalyzer(this);
|
|
Directory store = new MockRAMDirectory();
|
|
IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
|
|
Document d = new Document();
|
|
d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
|
|
writer.AddDocument(d);
|
|
writer.Optimize();
|
|
writer.Close();
|
|
|
|
|
|
IndexSearcher searcher = new IndexSearcher(store, true);
|
|
|
|
TermPositions pos = searcher.IndexReader.TermPositions(new Term("field", "1"));
|
|
pos.Next();
|
|
// first token should be at position 0
|
|
Assert.AreEqual(0, pos.NextPosition());
|
|
|
|
pos = searcher.IndexReader.TermPositions(new Term("field", "2"));
|
|
pos.Next();
|
|
// second token should be at position 2
|
|
Assert.AreEqual(2, pos.NextPosition());
|
|
|
|
PhraseQuery q;
|
|
ScoreDoc[] hits;
|
|
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "1"));
|
|
q.Add(new Term("field", "2"));
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// same as previous, just specify positions explicitely.
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "1"), 0);
|
|
q.Add(new Term("field", "2"), 1);
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// specifying correct positions should find the phrase.
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "1"), 0);
|
|
q.Add(new Term("field", "2"), 2);
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(1, hits.Length);
|
|
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "2"));
|
|
q.Add(new Term("field", "3"));
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(1, hits.Length);
|
|
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "3"));
|
|
q.Add(new Term("field", "4"));
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// phrase query would find it when correct positions are specified.
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "3"), 0);
|
|
q.Add(new Term("field", "4"), 0);
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(1, hits.Length);
|
|
|
|
// phrase query should fail for non existing searched term
|
|
// even if there exist another searched terms in the same searched position.
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "3"), 0);
|
|
q.Add(new Term("field", "9"), 0);
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// multi-phrase query should succed for non existing searched term
|
|
// because there exist another searched terms in the same searched position.
|
|
MultiPhraseQuery mq = new MultiPhraseQuery();
|
|
mq.Add(new Term[]{new Term("field", "3"), new Term("field", "9")}, 0);
|
|
hits = searcher.Search(mq, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(1, hits.Length);
|
|
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "2"));
|
|
q.Add(new Term("field", "4"));
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(1, hits.Length);
|
|
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "3"));
|
|
q.Add(new Term("field", "5"));
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(1, hits.Length);
|
|
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "4"));
|
|
q.Add(new Term("field", "5"));
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(1, hits.Length);
|
|
|
|
q = new PhraseQuery();
|
|
q.Add(new Term("field", "2"));
|
|
q.Add(new Term("field", "5"));
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// should not find "1 2" because there is a gap of 1 in the index
|
|
QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(false));
|
|
q = (PhraseQuery) qp.Parse("\"1 2\"");
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// omitted stop word cannot help because stop filter swallows the increments.
|
|
q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// query parser alone won't help, because stop filter swallows the increments.
|
|
qp.EnablePositionIncrements = true;
|
|
q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// stop filter alone won't help, because query parser swallows the increments.
|
|
qp.EnablePositionIncrements = false;
|
|
q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(0, hits.Length);
|
|
|
|
// when both qp qnd stopFilter propagate increments, we should find the doc.
|
|
qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(true));
|
|
qp.EnablePositionIncrements = true;
|
|
q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
|
|
hits = searcher.Search(q, null, 1000).ScoreDocs;
|
|
Assert.AreEqual(1, hits.Length);
|
|
}
|
|
|
|
private class StopWhitespaceAnalyzer:Analyzer
|
|
{
|
|
internal bool enablePositionIncrements;
|
|
internal WhitespaceAnalyzer a = new WhitespaceAnalyzer();
|
|
public StopWhitespaceAnalyzer(bool enablePositionIncrements)
|
|
{
|
|
this.enablePositionIncrements = enablePositionIncrements;
|
|
}
|
|
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
|
|
{
|
|
TokenStream ts = a.TokenStream(fieldName, reader);
|
|
return new StopFilter(enablePositionIncrements, ts, new CharArraySet(new List<string> {"stop"}, true));
|
|
}
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestPayloadsPos0()
|
|
{
|
|
Directory dir = new MockRAMDirectory();
|
|
IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true,
|
|
IndexWriter.MaxFieldLength.LIMITED);
|
|
Document doc = new Document();
|
|
System.IO.MemoryStream ms = new System.IO.MemoryStream();
|
|
System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
|
|
sw.Write("a a b c d e a f g h i j a b k k");
|
|
// flush to stream & reset it's position so it can be read
|
|
sw.Flush();
|
|
ms.Position = 0;
|
|
doc.Add(new Field("content", new System.IO.StreamReader(ms)));
|
|
writer.AddDocument(doc);
|
|
|
|
IndexReader r = writer.GetReader();
|
|
|
|
TermPositions tp = r.TermPositions(new Term("content", "a"));
|
|
int count = 0;
|
|
Assert.IsTrue(tp.Next());
|
|
// "a" occurs 4 times
|
|
Assert.AreEqual(4, tp.Freq);
|
|
int expected = 0;
|
|
Assert.AreEqual(expected, tp.NextPosition());
|
|
Assert.AreEqual(1, tp.NextPosition());
|
|
Assert.AreEqual(3, tp.NextPosition());
|
|
Assert.AreEqual(6, tp.NextPosition());
|
|
|
|
// only one doc has "a"
|
|
Assert.IsFalse(tp.Next());
|
|
|
|
IndexSearcher is_Renamed = new IndexSearcher(r);
|
|
|
|
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
|
|
SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
|
|
SpanQuery[] sqs = new SpanQuery[] {stq1, stq2};
|
|
SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);
|
|
|
|
count = 0;
|
|
bool sawZero = false;
|
|
//System.out.println("\ngetPayloadSpans test");
|
|
Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.IndexReader);
|
|
while (pspans.Next())
|
|
{
|
|
//System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
|
|
System.Collections.Generic.ICollection<byte[]> payloads = pspans.GetPayload();
|
|
sawZero |= pspans.Start() == 0;
|
|
for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
|
|
{
|
|
count++;
|
|
System.Object generatedAux2 = it.Current;
|
|
//System.out.println(new String((byte[]) it.next()));
|
|
}
|
|
}
|
|
Assert.AreEqual(5, count);
|
|
Assert.IsTrue(sawZero);
|
|
|
|
//System.out.println("\ngetSpans test");
|
|
Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.IndexReader);
|
|
count = 0;
|
|
sawZero = false;
|
|
while (spans.Next())
|
|
{
|
|
count++;
|
|
sawZero |= spans.Start() == 0;
|
|
//System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
|
|
}
|
|
Assert.AreEqual(4, count);
|
|
Assert.IsTrue(sawZero);
|
|
|
|
//System.out.println("\nPayloadSpanUtil test");
|
|
|
|
sawZero = false;
|
|
PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.IndexReader);
|
|
System.Collections.Generic.ICollection<byte[]> pls = psu.GetPayloadsForQuery(snq);
|
|
count = pls.Count;
|
|
for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();)
|
|
{
|
|
System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[]) it.Current));
|
|
//System.out.println(s);
|
|
sawZero |= s.Equals("pos: 0");
|
|
}
|
|
Assert.AreEqual(5, count);
|
|
Assert.IsTrue(sawZero);
|
|
writer.Close();
|
|
is_Renamed.IndexReader.Close();
|
|
dir.Close();
|
|
}
|
|
}
|
|
|
|
class TestPayloadAnalyzer:Analyzer
|
|
{
|
|
|
|
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
|
|
{
|
|
TokenStream result = new LowerCaseTokenizer(reader);
|
|
return new PayloadFilter(result, fieldName);
|
|
}
|
|
}
|
|
|
|
class PayloadFilter:TokenFilter
|
|
{
|
|
internal System.String fieldName;
|
|
|
|
internal int pos;
|
|
|
|
internal int i;
|
|
|
|
internal IPositionIncrementAttribute posIncrAttr;
|
|
internal IPayloadAttribute payloadAttr;
|
|
internal ITermAttribute termAttr;
|
|
|
|
public PayloadFilter(TokenStream input, System.String fieldName):base(input)
|
|
{
|
|
this.fieldName = fieldName;
|
|
pos = 0;
|
|
i = 0;
|
|
posIncrAttr = input.AddAttribute<IPositionIncrementAttribute>();
|
|
payloadAttr = input.AddAttribute<IPayloadAttribute>();
|
|
termAttr = input.AddAttribute<ITermAttribute>();
|
|
}
|
|
|
|
public override bool IncrementToken()
|
|
{
|
|
if (input.IncrementToken())
|
|
{
|
|
payloadAttr.Payload = new Payload(System.Text.UTF8Encoding.UTF8.GetBytes("pos: " + pos));
|
|
int posIncr;
|
|
if (i % 2 == 1)
|
|
{
|
|
posIncr = 1;
|
|
}
|
|
else
|
|
{
|
|
posIncr = 0;
|
|
}
|
|
posIncrAttr.PositionIncrement = posIncr;
|
|
pos += posIncr;
|
|
// System.out.println("term=" + termAttr.term() + " pos=" + pos);
|
|
i++;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
} |