593 lines
17 KiB
C#
593 lines
17 KiB
C#
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
using System;
|
|
|
|
using NUnit.Framework;
|
|
|
|
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
|
|
using Document = Lucene.Net.Documents.Document;
|
|
using Field = Lucene.Net.Documents.Field;
|
|
using Directory = Lucene.Net.Store.Directory;
|
|
using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory;
|
|
using RAMDirectory = Lucene.Net.Store.RAMDirectory;
|
|
using PhraseQuery = Lucene.Net.Search.PhraseQuery;
|
|
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
|
|
|
|
namespace Lucene.Net.Index
|
|
{
|
|
|
|
[TestFixture]
|
|
public class TestAddIndexesNoOptimize:LuceneTestCase
|
|
{
|
|
[Test]
|
|
public virtual void TestSimpleCase()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// two auxiliary directories
|
|
Directory aux = new RAMDirectory();
|
|
Directory aux2 = new RAMDirectory();
|
|
|
|
IndexWriter writer = null;
|
|
|
|
writer = NewWriter(dir, true);
|
|
// add 100 documents
|
|
AddDocs(writer, 100);
|
|
Assert.AreEqual(100, writer.MaxDoc());
|
|
writer.Close();
|
|
|
|
writer = NewWriter(aux, true);
|
|
writer.UseCompoundFile = false; // use one without a compound file
|
|
// add 40 documents in separate files
|
|
AddDocs(writer, 40);
|
|
Assert.AreEqual(40, writer.MaxDoc());
|
|
writer.Close();
|
|
|
|
writer = NewWriter(aux2, true);
|
|
// add 40 documents in compound files
|
|
AddDocs2(writer, 50);
|
|
Assert.AreEqual(50, writer.MaxDoc());
|
|
writer.Close();
|
|
|
|
// test doc count before segments are merged
|
|
writer = NewWriter(dir, false);
|
|
Assert.AreEqual(100, writer.MaxDoc());
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux, aux2});
|
|
Assert.AreEqual(190, writer.MaxDoc());
|
|
writer.Close();
|
|
|
|
// make sure the old index is correct
|
|
VerifyNumDocs(aux, 40);
|
|
|
|
// make sure the new index is correct
|
|
VerifyNumDocs(dir, 190);
|
|
|
|
// now add another set in.
|
|
Directory aux3 = new RAMDirectory();
|
|
writer = NewWriter(aux3, true);
|
|
// add 40 documents
|
|
AddDocs(writer, 40);
|
|
Assert.AreEqual(40, writer.MaxDoc());
|
|
writer.Close();
|
|
|
|
// test doc count before segments are merged/index is optimized
|
|
writer = NewWriter(dir, false);
|
|
Assert.AreEqual(190, writer.MaxDoc());
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux3});
|
|
Assert.AreEqual(230, writer.MaxDoc());
|
|
writer.Close();
|
|
|
|
// make sure the new index is correct
|
|
VerifyNumDocs(dir, 230);
|
|
|
|
VerifyTermDocs(dir, new Term("content", "aaa"), 180);
|
|
|
|
VerifyTermDocs(dir, new Term("content", "bbb"), 50);
|
|
|
|
// now optimize it.
|
|
writer = NewWriter(dir, false);
|
|
writer.Optimize();
|
|
writer.Close();
|
|
|
|
// make sure the new index is correct
|
|
VerifyNumDocs(dir, 230);
|
|
|
|
VerifyTermDocs(dir, new Term("content", "aaa"), 180);
|
|
|
|
VerifyTermDocs(dir, new Term("content", "bbb"), 50);
|
|
|
|
// now add a single document
|
|
Directory aux4 = new RAMDirectory();
|
|
writer = NewWriter(aux4, true);
|
|
AddDocs2(writer, 1);
|
|
writer.Close();
|
|
|
|
writer = NewWriter(dir, false);
|
|
Assert.AreEqual(230, writer.MaxDoc());
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux4});
|
|
Assert.AreEqual(231, writer.MaxDoc());
|
|
writer.Close();
|
|
|
|
VerifyNumDocs(dir, 231);
|
|
|
|
VerifyTermDocs(dir, new Term("content", "bbb"), 51);
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestWithPendingDeletes()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
|
|
SetUpDirs(dir, aux);
|
|
IndexWriter writer = NewWriter(dir, false);
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux});
|
|
|
|
// Adds 10 docs, then replaces them with another 10
|
|
// docs, so 10 pending deletes:
|
|
for (int i = 0; i < 20; i++)
|
|
{
|
|
Document doc = new Document();
|
|
doc.Add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED));
|
|
doc.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
|
|
writer.UpdateDocument(new Term("id", "" + (i % 10)), doc);
|
|
}
|
|
// Deletes one of the 10 added docs, leaving 9:
|
|
PhraseQuery q = new PhraseQuery();
|
|
q.Add(new Term("content", "bbb"));
|
|
q.Add(new Term("content", "14"));
|
|
writer.DeleteDocuments(q);
|
|
|
|
writer.Optimize();
|
|
writer.Commit();
|
|
|
|
VerifyNumDocs(dir, 1039);
|
|
VerifyTermDocs(dir, new Term("content", "aaa"), 1030);
|
|
VerifyTermDocs(dir, new Term("content", "bbb"), 9);
|
|
|
|
writer.Close();
|
|
dir.Close();
|
|
aux.Close();
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestWithPendingDeletes2()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
|
|
SetUpDirs(dir, aux);
|
|
IndexWriter writer = NewWriter(dir, false);
|
|
|
|
// Adds 10 docs, then replaces them with another 10
|
|
// docs, so 10 pending deletes:
|
|
for (int i = 0; i < 20; i++)
|
|
{
|
|
Document doc = new Document();
|
|
doc.Add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED));
|
|
doc.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
|
|
writer.UpdateDocument(new Term("id", "" + (i % 10)), doc);
|
|
}
|
|
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux});
|
|
|
|
// Deletes one of the 10 added docs, leaving 9:
|
|
PhraseQuery q = new PhraseQuery();
|
|
q.Add(new Term("content", "bbb"));
|
|
q.Add(new Term("content", "14"));
|
|
writer.DeleteDocuments(q);
|
|
|
|
writer.Optimize();
|
|
writer.Commit();
|
|
|
|
VerifyNumDocs(dir, 1039);
|
|
VerifyTermDocs(dir, new Term("content", "aaa"), 1030);
|
|
VerifyTermDocs(dir, new Term("content", "bbb"), 9);
|
|
|
|
writer.Close();
|
|
dir.Close();
|
|
aux.Close();
|
|
}
|
|
|
|
[Test]
|
|
public virtual void TestWithPendingDeletes3()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
|
|
SetUpDirs(dir, aux);
|
|
IndexWriter writer = NewWriter(dir, false);
|
|
|
|
// Adds 10 docs, then replaces them with another 10
|
|
// docs, so 10 pending deletes:
|
|
for (int i = 0; i < 20; i++)
|
|
{
|
|
Document doc = new Document();
|
|
doc.Add(new Field("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED));
|
|
doc.Add(new Field("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
|
|
writer.UpdateDocument(new Term("id", "" + (i % 10)), doc);
|
|
}
|
|
|
|
// Deletes one of the 10 added docs, leaving 9:
|
|
PhraseQuery q = new PhraseQuery();
|
|
q.Add(new Term("content", "bbb"));
|
|
q.Add(new Term("content", "14"));
|
|
writer.DeleteDocuments(q);
|
|
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux});
|
|
|
|
writer.Optimize();
|
|
writer.Commit();
|
|
|
|
VerifyNumDocs(dir, 1039);
|
|
VerifyTermDocs(dir, new Term("content", "aaa"), 1030);
|
|
VerifyTermDocs(dir, new Term("content", "bbb"), 9);
|
|
|
|
writer.Close();
|
|
dir.Close();
|
|
aux.Close();
|
|
}
|
|
|
|
// case 0: add self or exceed maxMergeDocs, expect exception
|
|
[Test]
|
|
public virtual void TestAddSelf()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
|
|
IndexWriter writer = null;
|
|
|
|
writer = NewWriter(dir, true);
|
|
// add 100 documents
|
|
AddDocs(writer, 100);
|
|
Assert.AreEqual(100, writer.MaxDoc());
|
|
writer.Close();
|
|
|
|
writer = NewWriter(aux, true);
|
|
writer.UseCompoundFile = false; // use one without a compound file
|
|
writer.SetMaxBufferedDocs(1000);
|
|
// add 140 documents in separate files
|
|
AddDocs(writer, 40);
|
|
writer.Close();
|
|
writer = NewWriter(aux, true);
|
|
writer.UseCompoundFile = false; // use one without a compound file
|
|
writer.SetMaxBufferedDocs(1000);
|
|
AddDocs(writer, 100);
|
|
writer.Close();
|
|
|
|
writer = NewWriter(dir, false);
|
|
try
|
|
{
|
|
// cannot add self
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux, dir});
|
|
Assert.IsTrue(false);
|
|
}
|
|
catch (System.ArgumentException)
|
|
{
|
|
Assert.AreEqual(100, writer.MaxDoc());
|
|
}
|
|
writer.Close();
|
|
|
|
// make sure the index is correct
|
|
VerifyNumDocs(dir, 100);
|
|
}
|
|
|
|
// in all the remaining tests, make the doc count of the oldest segment
|
|
// in dir large so that it is never merged in addIndexesNoOptimize()
|
|
// case 1: no tail segments
|
|
[Test]
|
|
public virtual void TestNoTailSegments()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
|
|
SetUpDirs(dir, aux);
|
|
|
|
IndexWriter writer = NewWriter(dir, false);
|
|
writer.SetMaxBufferedDocs(10);
|
|
writer.MergeFactor = 4;
|
|
AddDocs(writer, 10);
|
|
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux});
|
|
Assert.AreEqual(1040, writer.MaxDoc());
|
|
Assert.AreEqual(2, writer.GetSegmentCount());
|
|
Assert.AreEqual(1000, writer.GetDocCount(0));
|
|
writer.Close();
|
|
|
|
// make sure the index is correct
|
|
VerifyNumDocs(dir, 1040);
|
|
}
|
|
|
|
// case 2: tail segments, invariants hold, no copy
|
|
[Test]
|
|
public virtual void TestNoCopySegments()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
|
|
SetUpDirs(dir, aux);
|
|
|
|
IndexWriter writer = NewWriter(dir, false);
|
|
writer.SetMaxBufferedDocs(9);
|
|
writer.MergeFactor = 4;
|
|
AddDocs(writer, 2);
|
|
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux});
|
|
Assert.AreEqual(1032, writer.MaxDoc());
|
|
Assert.AreEqual(2, writer.GetSegmentCount());
|
|
Assert.AreEqual(1000, writer.GetDocCount(0));
|
|
writer.Close();
|
|
|
|
// make sure the index is correct
|
|
VerifyNumDocs(dir, 1032);
|
|
}
|
|
|
|
// case 3: tail segments, invariants hold, copy, invariants hold
|
|
[Test]
|
|
public virtual void TestNoMergeAfterCopy()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
|
|
SetUpDirs(dir, aux);
|
|
|
|
IndexWriter writer = NewWriter(dir, false);
|
|
writer.SetMaxBufferedDocs(10);
|
|
writer.MergeFactor = 4;
|
|
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux, new RAMDirectory(aux)});
|
|
Assert.AreEqual(1060, writer.MaxDoc());
|
|
Assert.AreEqual(1000, writer.GetDocCount(0));
|
|
writer.Close();
|
|
|
|
// make sure the index is correct
|
|
VerifyNumDocs(dir, 1060);
|
|
}
|
|
|
|
// case 4: tail segments, invariants hold, copy, invariants not hold
|
|
[Test]
|
|
public virtual void TestMergeAfterCopy()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
|
|
SetUpDirs(dir, aux);
|
|
|
|
IndexReader reader = IndexReader.Open(aux, false);
|
|
for (int i = 0; i < 20; i++)
|
|
{
|
|
reader.DeleteDocument(i);
|
|
}
|
|
Assert.AreEqual(10, reader.NumDocs());
|
|
reader.Close();
|
|
|
|
IndexWriter writer = NewWriter(dir, false);
|
|
writer.SetMaxBufferedDocs(4);
|
|
writer.MergeFactor = 4;
|
|
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux, new RAMDirectory(aux)});
|
|
Assert.AreEqual(1020, writer.MaxDoc());
|
|
Assert.AreEqual(1000, writer.GetDocCount(0));
|
|
writer.Close();
|
|
|
|
// make sure the index is correct
|
|
VerifyNumDocs(dir, 1020);
|
|
}
|
|
|
|
// case 5: tail segments, invariants not hold
|
|
[Test]
|
|
public virtual void TestMoreMerges()
|
|
{
|
|
// main directory
|
|
Directory dir = new RAMDirectory();
|
|
// auxiliary directory
|
|
Directory aux = new RAMDirectory();
|
|
Directory aux2 = new RAMDirectory();
|
|
|
|
SetUpDirs(dir, aux);
|
|
|
|
IndexWriter writer = NewWriter(aux2, true);
|
|
writer.SetMaxBufferedDocs(100);
|
|
writer.MergeFactor = 10;
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux});
|
|
Assert.AreEqual(30, writer.MaxDoc());
|
|
Assert.AreEqual(3, writer.GetSegmentCount());
|
|
writer.Close();
|
|
|
|
IndexReader reader = IndexReader.Open(aux, false);
|
|
for (int i = 0; i < 27; i++)
|
|
{
|
|
reader.DeleteDocument(i);
|
|
}
|
|
Assert.AreEqual(3, reader.NumDocs());
|
|
reader.Close();
|
|
|
|
reader = IndexReader.Open(aux2, false);
|
|
for (int i = 0; i < 8; i++)
|
|
{
|
|
reader.DeleteDocument(i);
|
|
}
|
|
Assert.AreEqual(22, reader.NumDocs());
|
|
reader.Close();
|
|
|
|
writer = NewWriter(dir, false);
|
|
writer.SetMaxBufferedDocs(6);
|
|
writer.MergeFactor = 4;
|
|
|
|
writer.AddIndexesNoOptimize(new Directory[]{aux, aux2});
|
|
Assert.AreEqual(1025, writer.MaxDoc());
|
|
Assert.AreEqual(1000, writer.GetDocCount(0));
|
|
writer.Close();
|
|
|
|
// make sure the index is correct
|
|
VerifyNumDocs(dir, 1025);
|
|
}
|
|
|
|
private IndexWriter NewWriter(Directory dir, bool create)
|
|
{
|
|
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), create, IndexWriter.MaxFieldLength.UNLIMITED);
|
|
writer.SetMergePolicy(new LogDocMergePolicy(writer));
|
|
return writer;
|
|
}
|
|
|
|
private void AddDocs(IndexWriter writer, int numDocs)
|
|
{
|
|
for (int i = 0; i < numDocs; i++)
|
|
{
|
|
Document doc = new Document();
|
|
doc.Add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
|
|
writer.AddDocument(doc);
|
|
}
|
|
}
|
|
|
|
private void AddDocs2(IndexWriter writer, int numDocs)
|
|
{
|
|
for (int i = 0; i < numDocs; i++)
|
|
{
|
|
Document doc = new Document();
|
|
doc.Add(new Field("content", "bbb", Field.Store.NO, Field.Index.ANALYZED));
|
|
writer.AddDocument(doc);
|
|
}
|
|
}
|
|
|
|
private void VerifyNumDocs(Directory dir, int numDocs)
|
|
{
|
|
IndexReader reader = IndexReader.Open(dir, true);
|
|
Assert.AreEqual(numDocs, reader.MaxDoc);
|
|
Assert.AreEqual(numDocs, reader.NumDocs());
|
|
reader.Close();
|
|
}
|
|
|
|
private void VerifyTermDocs(Directory dir, Term term, int numDocs)
|
|
{
|
|
IndexReader reader = IndexReader.Open(dir, true);
|
|
TermDocs termDocs = reader.TermDocs(term);
|
|
int count = 0;
|
|
while (termDocs.Next())
|
|
count++;
|
|
Assert.AreEqual(numDocs, count);
|
|
reader.Close();
|
|
}
|
|
|
|
private void SetUpDirs(Directory dir, Directory aux)
|
|
{
|
|
IndexWriter writer = null;
|
|
|
|
writer = NewWriter(dir, true);
|
|
writer.SetMaxBufferedDocs(1000);
|
|
// add 1000 documents in 1 segment
|
|
AddDocs(writer, 1000);
|
|
Assert.AreEqual(1000, writer.MaxDoc());
|
|
Assert.AreEqual(1, writer.GetSegmentCount());
|
|
writer.Close();
|
|
|
|
writer = NewWriter(aux, true);
|
|
writer.UseCompoundFile = false; // use one without a compound file
|
|
writer.SetMaxBufferedDocs(100);
|
|
writer.MergeFactor = 10;
|
|
// add 30 documents in 3 segments
|
|
for (int i = 0; i < 3; i++)
|
|
{
|
|
AddDocs(writer, 10);
|
|
writer.Close();
|
|
writer = NewWriter(aux, false);
|
|
writer.UseCompoundFile = false; // use one without a compound file
|
|
writer.SetMaxBufferedDocs(100);
|
|
writer.MergeFactor = 10;
|
|
}
|
|
Assert.AreEqual(30, writer.MaxDoc());
|
|
Assert.AreEqual(3, writer.GetSegmentCount());
|
|
writer.Close();
|
|
}
|
|
|
|
// LUCENE-1270
|
|
[Test]
|
|
public virtual void TestHangOnClose()
|
|
{
|
|
|
|
Directory dir = new MockRAMDirectory();
|
|
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
|
writer.SetMergePolicy(new LogByteSizeMergePolicy(writer));
|
|
writer.SetMaxBufferedDocs(5);
|
|
writer.UseCompoundFile = false;
|
|
writer.MergeFactor = 100;
|
|
|
|
Document doc = new Document();
|
|
doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
|
for (int i = 0; i < 60; i++)
|
|
writer.AddDocument(doc);
|
|
writer.SetMaxBufferedDocs(200);
|
|
Document doc2 = new Document();
|
|
doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
|
|
doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
|
|
doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
|
|
doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
|
|
for (int i = 0; i < 10; i++)
|
|
writer.AddDocument(doc2);
|
|
writer.Close();
|
|
|
|
Directory dir2 = new MockRAMDirectory();
|
|
writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
|
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer);
|
|
lmp.MinMergeMB = 0.0001;
|
|
writer.SetMergePolicy(lmp);
|
|
writer.MergeFactor = 4;
|
|
writer.UseCompoundFile = false;
|
|
writer.SetMergeScheduler(new SerialMergeScheduler());
|
|
writer.AddIndexesNoOptimize(new Directory[]{dir});
|
|
writer.Close();
|
|
dir.Close();
|
|
dir2.Close();
|
|
}
|
|
|
|
// LUCENE-1642: make sure CFS of destination indexwriter
|
|
// is respected when copying tail segments
|
|
[Test]
|
|
public virtual void TestTargetCFS()
|
|
{
|
|
Directory dir = new RAMDirectory();
|
|
IndexWriter writer = NewWriter(dir, true);
|
|
writer.UseCompoundFile = false;
|
|
AddDocs(writer, 1);
|
|
writer.Close();
|
|
|
|
Directory other = new RAMDirectory();
|
|
writer = NewWriter(other, true);
|
|
writer.UseCompoundFile = true;
|
|
writer.AddIndexesNoOptimize(new Directory[]{dir});
|
|
Assert.IsTrue(writer.NewestSegment().GetUseCompoundFile());
|
|
writer.Close();
|
|
}
|
|
}
|
|
} |