Imported Upstream version 3.6.0

Former-commit-id: da6be194a6b1221998fc28233f2503bd61dd9d14
This commit is contained in:
Jo Shields
2014-08-13 10:39:27 +01:00
commit a575963da9
50588 changed files with 8155799 additions and 0 deletions

View File

@@ -0,0 +1,142 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
using System;
using System.IO;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.NGram;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
namespace Lucene.Net.Analyzers.NGram
{
/*
* Tests {@link EdgeNGramTokenFilter} for correctness.
*/
[TestFixture]
public class EdgeNGramTokenFilterTest : BaseTokenStreamTestCase
{
private TokenStream input;
public override void SetUp()
{
input = new WhitespaceTokenizer(new StringReader("abcde"));
}
[Test]
public void TestInvalidInput()
{
bool gotException = false;
try
{
new EdgeNGramTokenFilter(input, Side.FRONT, 0, 0);
}
catch (ArgumentException e)
{
gotException = true;
}
Assert.True(gotException);
}
[Test]
public void TestInvalidInput2()
{
bool gotException = false;
try
{
new EdgeNGramTokenFilter(input, Side.FRONT, 2, 1);
}
catch (ArgumentException e)
{
gotException = true;
}
Assert.True(gotException);
}
[Test]
public void TestInvalidInput3()
{
bool gotException = false;
try
{
new EdgeNGramTokenFilter(input, Side.FRONT, -1, 2);
}
catch (ArgumentException e)
{
gotException = true;
}
Assert.True(gotException);
}
[Test]
public void TestFrontUnigram()
{
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, Side.FRONT, 1, 1);
AssertTokenStreamContents(tokenizer, new String[] { "a" }, new int[] { 0 }, new int[] { 1 });
}
[Test]
public void TestBackUnigram()
{
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, Side.BACK, 1, 1);
AssertTokenStreamContents(tokenizer, new String[] { "e" }, new int[] { 4 }, new int[] { 5 });
}
[Test]
public void TestOversizedNgrams()
{
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, Side.FRONT, 6, 6);
AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0]);
}
[Test]
public void TestFrontRangeOfNgrams()
{
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, Side.FRONT, 1, 3);
AssertTokenStreamContents(tokenizer, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
}
[Test]
public void TestBackRangeOfNgrams()
{
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, Side.BACK, 1, 3);
AssertTokenStreamContents(tokenizer, new String[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 });
}
[Test]
public void TestSmallTokenInStream()
{
input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, Side.FRONT, 3, 3);
AssertTokenStreamContents(tokenizer, new String[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
}
[Test]
public void TestReset()
{
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(tokenizer, Side.FRONT, 1, 3);
AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
tokenizer.Reset(new StringReader("abcde"));
AssertTokenStreamContents(filter, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 });
}
}
}

View File

@@ -0,0 +1,131 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
using System;
using System.IO;
using Lucene.Net.Analysis.NGram;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
namespace Lucene.Net.Analyzers.NGram
{
/*
* Tests {@link EdgeNGramTokenizer} for correctness.
*/
[TestFixture]
public class EdgeNGramTokenizerTest : BaseTokenStreamTestCase
{
private StringReader input;
public override void SetUp()
{
input = new StringReader("abcde");
}
[Test]
public void TestInvalidInput()
{
bool gotException = false;
try
{
new EdgeNGramTokenizer(input, Side.FRONT, 0, 0);
}
catch (ArgumentException e)
{
gotException = true;
}
Assert.True(gotException);
}
[Test]
public void TestInvalidInput2()
{
bool gotException = false;
try
{
new EdgeNGramTokenizer(input, Side.FRONT, 2, 1);
}
catch (ArgumentException e)
{
gotException = true;
}
Assert.True(gotException);
}
[Test]
public void TestInvalidInput3()
{
bool gotException = false;
try
{
new EdgeNGramTokenizer(input, Side.FRONT, -1, 2);
}
catch (ArgumentException e)
{
gotException = true;
}
Assert.True(gotException);
}
[Test]
public void TestFrontUnigram()
{
EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, Side.FRONT, 1, 1);
AssertTokenStreamContents(tokenizer, new String[] { "a" }, new int[] { 0 }, new int[] { 1 }, 5 /* abcde */);
}
[Test]
public void TestBackUnigram()
{
EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, Side.BACK, 1, 1);
AssertTokenStreamContents(tokenizer, new String[] { "e" }, new int[] { 4 }, new int[] { 5 }, 5 /* abcde */);
}
[Test]
public void TestOversizedNgrams()
{
EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, Side.FRONT, 6, 6);
AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
}
[Test]
public void TestFrontRangeOfNgrams()
{
EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, Side.FRONT, 1, 3);
AssertTokenStreamContents(tokenizer, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5 /* abcde */);
}
[Test]
public void TestBackRangeOfNgrams()
{
EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, Side.BACK, 1, 3);
AssertTokenStreamContents(tokenizer, new String[] { "e", "de", "cde" }, new int[] { 4, 3, 2 }, new int[] { 5, 5, 5 }, 5 /* abcde */);
}
[Test]
public void TestReset()
{
EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(input, Side.FRONT, 1, 3);
AssertTokenStreamContents(tokenizer, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5 /* abcde */);
tokenizer.Reset(new StringReader("abcde"));
AssertTokenStreamContents(tokenizer, new String[] { "a", "ab", "abc" }, new int[] { 0, 0, 0 }, new int[] { 1, 2, 3 }, 5 /* abcde */);
}
}
}

View File

@@ -0,0 +1,123 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.IO;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.NGram;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
namespace Lucene.Net.Analyzers.NGram
{
/*
* Tests {@link NGramTokenFilter} for correctness.
*/
[TestFixture]
public class TestNGramTokenFilter : BaseTokenStreamTestCase
{
private TokenStream input;
[SetUp]
public override void SetUp()
{
base.SetUp();
input = new WhitespaceTokenizer(new StringReader("abcde"));
}
[Test]
public void TestInvalidInput()
{
bool gotException = false;
try
{
new NGramTokenFilter(input, 2, 1);
}
catch (System.ArgumentException e)
{
gotException = true;
}
Assert.IsTrue(gotException);
}
[Test]
public void TestInvalidInput2()
{
bool gotException = false;
try
{
new NGramTokenFilter(input, 0, 1);
}
catch (System.ArgumentException e)
{
gotException = true;
}
Assert.IsTrue(gotException);
}
[Test]
public void TestUnigrams()
{
NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1);
AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
}
[Test]
public void TestBigrams()
{
NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2);
AssertTokenStreamContents(filter, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 });
}
[Test]
public void TestNgrams()
{
NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3);
AssertTokenStreamContents(filter,
new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" },
new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 },
new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 }
);
}
[Test]
public void TestOversizedNgrams()
{
NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7);
AssertTokenStreamContents(filter, new String[0], new int[0], new int[0]);
}
[Test]
public void TestSmallTokenInStream()
{
input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
AssertTokenStreamContents(filter, new String[] { "abc", "fgh" }, new int[] { 0, 7 }, new int[] { 3, 10 });
}
[Test]
public void TestReset()
{
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(new StringReader("abcde"));
NGramTokenFilter filter = new NGramTokenFilter(tokenizer, 1, 1);
AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
tokenizer.Reset(new StringReader("abcde"));
AssertTokenStreamContents(filter, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 });
}
}
}

View File

@@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.IO;
using Lucene.Net.Analysis.NGram;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
namespace Lucene.Net.Analyzers.NGram
{
/*
* Tests {@link NGramTokenizer} for correctness.
*/
[TestFixture]
public class TestNGramTokenizer : BaseTokenStreamTestCase
{
private StringReader input;
[SetUp]
public override void SetUp()
{
base.SetUp();
input = new StringReader("abcde");
}
[Test]
public void TestInvalidInput()
{
bool gotException = false;
try
{
new NGramTokenizer(input, 2, 1);
}
catch (System.ArgumentException e)
{
gotException = true;
}
Assert.IsTrue(gotException);
}
[Test]
public void TestInvalidInput2()
{
bool gotException = false;
try
{
new NGramTokenizer(input, 0, 1);
}
catch (System.ArgumentException e)
{
gotException = true;
}
Assert.IsTrue(gotException);
}
[Test]
public void TestUnigrams()
{
NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
}
[Test]
public void TestBigrams()
{
NGramTokenizer tokenizer = new NGramTokenizer(input, 2, 2);
AssertTokenStreamContents(tokenizer, new String[] { "ab", "bc", "cd", "de" }, new int[] { 0, 1, 2, 3 }, new int[] { 2, 3, 4, 5 }, 5 /* abcde */);
}
[Test]
public void TestNgrams()
{
NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 3);
AssertTokenStreamContents(tokenizer,
new String[] { "a", "b", "c", "d", "e", "ab", "bc", "cd", "de", "abc", "bcd", "cde" },
new int[] { 0, 1, 2, 3, 4, 0, 1, 2, 3, 0, 1, 2 },
new int[] { 1, 2, 3, 4, 5, 2, 3, 4, 5, 3, 4, 5 },
5 /* abcde */
);
}
[Test]
public void TestOversizedNgrams()
{
NGramTokenizer tokenizer = new NGramTokenizer(input, 6, 7);
AssertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
}
[Test]
public void TestReset()
{
NGramTokenizer tokenizer = new NGramTokenizer(input, 1, 1);
AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
tokenizer.Reset(new StringReader("abcde"));
AssertTokenStreamContents(tokenizer, new String[] { "a", "b", "c", "d", "e" }, new int[] { 0, 1, 2, 3, 4 }, new int[] { 1, 2, 3, 4, 5 }, 5 /* abcde */);
}
}
}