linux-packaging-mono/external/Lucene.Net/test/core/Search/TestPositionIncrement.cs

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using System;
using System.Collections.Generic;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Util;
using NUnit.Framework;

using Analyzer = Lucene.Net.Analysis.Analyzer;
using LowerCaseTokenizer = Lucene.Net.Analysis.LowerCaseTokenizer;
using StopFilter = Lucene.Net.Analysis.StopFilter;
using TokenFilter = Lucene.Net.Analysis.TokenFilter;
using TokenStream = Lucene.Net.Analysis.TokenStream;
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using IndexReader = Lucene.Net.Index.IndexReader;
using IndexWriter = Lucene.Net.Index.IndexWriter;
using Payload = Lucene.Net.Index.Payload;
using Term = Lucene.Net.Index.Term;
using TermPositions = Lucene.Net.Index.TermPositions;
using QueryParser = Lucene.Net.QueryParsers.QueryParser;
using Directory = Lucene.Net.Store.Directory;
using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory;
using BaseTokenStreamTestCase = Lucene.Net.Test.Analysis.BaseTokenStreamTestCase;
using PayloadSpanUtil = Lucene.Net.Search.Payloads.PayloadSpanUtil;
using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;

namespace Lucene.Net.Search
{

	/// <summary>Term position unit test.</summary>
	public class TestPositionIncrement : LuceneTestCase
	{
		private class AnonymousClassAnalyzer:Analyzer
		{
			public AnonymousClassAnalyzer(TestPositionIncrement enclosingInstance)
			{
				InitBlock(enclosingInstance);
			}
			private class AnonymousClassTokenStream:TokenStream
			{
				public AnonymousClassTokenStream(AnonymousClassAnalyzer enclosingInstance)
				{
					InitBlock(enclosingInstance);
				}
				private void  InitBlock(AnonymousClassAnalyzer enclosingInstance)
				{
					this.enclosingInstance = enclosingInstance;
					posIncrAtt =  AddAttribute<IPositionIncrementAttribute>();
					termAtt =  AddAttribute<ITermAttribute>();
					offsetAtt =  AddAttribute<IOffsetAttribute>();
				}
				private AnonymousClassAnalyzer enclosingInstance;
				public AnonymousClassAnalyzer Enclosing_Instance
				{
					get
					{
						return enclosingInstance;
					}

				}
				private System.String[] TOKENS = new System.String[]{"1", "2", "3", "4", "5"};
				private int[] INCREMENTS = new int[]{0, 2, 1, 0, 1};
				private int i = 0;

				internal IPositionIncrementAttribute posIncrAtt;
				internal ITermAttribute termAtt;
				internal IOffsetAttribute offsetAtt;

                protected override void Dispose(bool disposing)
                {
                    // do nothing
                }

				public override bool IncrementToken()
				{
					if (i == TOKENS.Length)
						return false;
                    ClearAttributes();
					termAtt.SetTermBuffer(TOKENS[i]);
					offsetAtt.SetOffset(i, i);
					posIncrAtt.PositionIncrement = INCREMENTS[i];
					i++;
					return true;
				}
			}
			private void  InitBlock(TestPositionIncrement enclosingInstance)
			{
				this.enclosingInstance = enclosingInstance;
			}
			private TestPositionIncrement enclosingInstance;
			public TestPositionIncrement Enclosing_Instance
			{
				get
				{
					return enclosingInstance;
				}

			}
			public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
			{
				return new AnonymousClassTokenStream(this);
			}
		}

		[Test]
		public virtual void  TestSetPosition()
		{
			Analyzer analyzer = new AnonymousClassAnalyzer(this);
			Directory store = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			Document d = new Document();
			d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(d);
			writer.Optimize();
			writer.Close();


		    IndexSearcher searcher = new IndexSearcher(store, true);

			TermPositions pos = searcher.IndexReader.TermPositions(new Term("field", "1"));
			pos.Next();
			// first token should be at position 0
			Assert.AreEqual(0, pos.NextPosition());

			pos = searcher.IndexReader.TermPositions(new Term("field", "2"));
			pos.Next();
			// second token should be at position 2
			Assert.AreEqual(2, pos.NextPosition());

			PhraseQuery q;
			ScoreDoc[] hits;

			q = new PhraseQuery();
			q.Add(new Term("field", "1"));
			q.Add(new Term("field", "2"));
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// same as previous, just specify positions explicitely.
			q = new PhraseQuery();
			q.Add(new Term("field", "1"), 0);
			q.Add(new Term("field", "2"), 1);
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// specifying correct positions should find the phrase.
			q = new PhraseQuery();
			q.Add(new Term("field", "1"), 0);
			q.Add(new Term("field", "2"), 2);
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);

			q = new PhraseQuery();
			q.Add(new Term("field", "2"));
			q.Add(new Term("field", "3"));
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);

			q = new PhraseQuery();
			q.Add(new Term("field", "3"));
			q.Add(new Term("field", "4"));
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// phrase query would find it when correct positions are specified.
			q = new PhraseQuery();
			q.Add(new Term("field", "3"), 0);
			q.Add(new Term("field", "4"), 0);
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);

			// phrase query should fail for non existing searched term
			// even if there exist another searched terms in the same searched position.
			q = new PhraseQuery();
			q.Add(new Term("field", "3"), 0);
			q.Add(new Term("field", "9"), 0);
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// multi-phrase query should succed for non existing searched term
			// because there exist another searched terms in the same searched position.
			MultiPhraseQuery mq = new MultiPhraseQuery();
			mq.Add(new Term[]{new Term("field", "3"), new Term("field", "9")}, 0);
			hits = searcher.Search(mq, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);

			q = new PhraseQuery();
			q.Add(new Term("field", "2"));
			q.Add(new Term("field", "4"));
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);

			q = new PhraseQuery();
			q.Add(new Term("field", "3"));
			q.Add(new Term("field", "5"));
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);

			q = new PhraseQuery();
			q.Add(new Term("field", "4"));
			q.Add(new Term("field", "5"));
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);

			q = new PhraseQuery();
			q.Add(new Term("field", "2"));
			q.Add(new Term("field", "5"));
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// should not find "1 2" because there is a gap of 1 in the index
			QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(false));
			q = (PhraseQuery) qp.Parse("\"1 2\"");
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// omitted stop word cannot help because stop filter swallows the increments.
			q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// query parser alone won't help, because stop filter swallows the increments.
		    qp.EnablePositionIncrements = true;
			q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// stop filter alone won't help, because query parser swallows the increments.
		    qp.EnablePositionIncrements = false;
			q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length);

			// when both qp qnd stopFilter propagate increments, we should find the doc.
			qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(true));
		    qp.EnablePositionIncrements = true;
			q = (PhraseQuery) qp.Parse("\"1 stop 2\"");
			hits = searcher.Search(q, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
		}

		private class StopWhitespaceAnalyzer:Analyzer
		{
			internal bool enablePositionIncrements;
			internal WhitespaceAnalyzer a = new WhitespaceAnalyzer();
			public StopWhitespaceAnalyzer(bool enablePositionIncrements)
			{
				this.enablePositionIncrements = enablePositionIncrements;
			}
			public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
			{
				TokenStream ts = a.TokenStream(fieldName, reader);
			    return new StopFilter(enablePositionIncrements, ts, new CharArraySet(new List<string> {"stop"}, true));
			}
		}

        [Test]
        public virtual void TestPayloadsPos0()
        {
            Directory dir = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new TestPayloadAnalyzer(), true,
                                                 IndexWriter.MaxFieldLength.LIMITED);
            Document doc = new Document();
            System.IO.MemoryStream ms = new System.IO.MemoryStream();
            System.IO.StreamWriter sw = new System.IO.StreamWriter(ms);
            sw.Write("a a b c d e a f g h i j a b k k");
            // flush to stream & reset it's position so it can be read
            sw.Flush();
            ms.Position = 0;
            doc.Add(new Field("content", new System.IO.StreamReader(ms)));
            writer.AddDocument(doc);

            IndexReader r = writer.GetReader();

            TermPositions tp = r.TermPositions(new Term("content", "a"));
            int count = 0;
            Assert.IsTrue(tp.Next());
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq);
            int expected = 0;
            Assert.AreEqual(expected, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.IsFalse(tp.Next());

            IndexSearcher is_Renamed = new IndexSearcher(r);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));
            SpanQuery[] sqs = new SpanQuery[] {stq1, stq2};
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;
            //System.out.println("\ngetPayloadSpans test");
            Lucene.Net.Search.Spans.Spans pspans = snq.GetSpans(is_Renamed.IndexReader);
            while (pspans.Next())
            {
                //System.out.println(pspans.doc() + " - " + pspans.start() + " - "+ pspans.end());
                System.Collections.Generic.ICollection<byte[]> payloads = pspans.GetPayload();
                sawZero |= pspans.Start() == 0;
                for (System.Collections.IEnumerator it = payloads.GetEnumerator(); it.MoveNext();)
                {
                    count++;
                    System.Object generatedAux2 = it.Current;
                    //System.out.println(new String((byte[]) it.next()));
                }
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);

            //System.out.println("\ngetSpans test");
            Lucene.Net.Search.Spans.Spans spans = snq.GetSpans(is_Renamed.IndexReader);
            count = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                //System.out.println(spans.doc() + " - " + spans.start() + " - " + spans.end());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            //System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(is_Renamed.IndexReader);
            System.Collections.Generic.ICollection<byte[]> pls = psu.GetPayloadsForQuery(snq);
            count = pls.Count;
            for (System.Collections.IEnumerator it = pls.GetEnumerator(); it.MoveNext();)
            {
                System.String s = new System.String(System.Text.UTF8Encoding.UTF8.GetChars((byte[]) it.Current));
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Close();
            is_Renamed.IndexReader.Close();
            dir.Close();
        }
	}

	class TestPayloadAnalyzer:Analyzer
	{

		public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
		{
			TokenStream result = new LowerCaseTokenizer(reader);
			return new PayloadFilter(result, fieldName);
		}
	}

	class PayloadFilter:TokenFilter
	{
		internal System.String fieldName;

		internal int pos;

		internal int i;

		internal IPositionIncrementAttribute posIncrAttr;
		internal IPayloadAttribute payloadAttr;
		internal ITermAttribute termAttr;

		public PayloadFilter(TokenStream input, System.String fieldName):base(input)
		{
			this.fieldName = fieldName;
			pos = 0;
			i = 0;
			posIncrAttr =  input.AddAttribute<IPositionIncrementAttribute>();
			payloadAttr =  input.AddAttribute<IPayloadAttribute>();
			termAttr =  input.AddAttribute<ITermAttribute>();
		}

		public override bool IncrementToken()
		{
			if (input.IncrementToken())
			{
				payloadAttr.Payload = new Payload(System.Text.UTF8Encoding.UTF8.GetBytes("pos: " + pos));
				int posIncr;
				if (i % 2 == 1)
				{
					posIncr = 1;
				}
				else
				{
					posIncr = 0;
				}
				posIncrAttr.PositionIncrement = posIncr;
				pos += posIncr;
				// System.out.println("term=" + termAttr.term() + " pos=" + pos);
				i++;
				return true;
			}
			else
			{
				return false;
			}
		}
	}
}