Jo Shields a575963da9 Imported Upstream version 3.6.0
Former-commit-id: da6be194a6b1221998fc28233f2503bd61dd9d14
2014-08-13 10:39:27 +01:00

188 lines
6.2 KiB
C#

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
using StandardTokenizer = Lucene.Net.Analysis.Standard.StandardTokenizer;
using Payload = Lucene.Net.Index.Payload;
using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Analysis
{
[TestFixture]
public class TestAnalyzers : BaseTokenStreamTestCase
{
//public TestAnalyzers(System.String name) : base(name)
//{
//}
[Test]
public virtual void TestSimple()
{
Analyzer a = new SimpleAnalyzer();
AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[]{"foo", "bar", "foo", "bar"});
AssertAnalyzesTo(a, "foo bar . FOO <> BAR", new System.String[]{"foo", "bar", "foo", "bar"});
AssertAnalyzesTo(a, "foo.bar.FOO.BAR", new System.String[]{"foo", "bar", "foo", "bar"});
AssertAnalyzesTo(a, "U.S.A.", new System.String[]{"u", "s", "a"});
AssertAnalyzesTo(a, "C++", new System.String[]{"c"});
AssertAnalyzesTo(a, "B2B", new System.String[]{"b", "b"});
AssertAnalyzesTo(a, "2B", new System.String[]{"b"});
AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"quoted", "word"});
}
[Test]
public virtual void TestNull()
{
Analyzer a = new WhitespaceAnalyzer();
AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[]{"foo", "bar", "FOO", "BAR"});
AssertAnalyzesTo(a, "foo bar . FOO <> BAR", new System.String[]{"foo", "bar", ".", "FOO", "<>", "BAR"});
AssertAnalyzesTo(a, "foo.bar.FOO.BAR", new System.String[]{"foo.bar.FOO.BAR"});
AssertAnalyzesTo(a, "U.S.A.", new System.String[]{"U.S.A."});
AssertAnalyzesTo(a, "C++", new System.String[]{"C++"});
AssertAnalyzesTo(a, "B2B", new System.String[]{"B2B"});
AssertAnalyzesTo(a, "2B", new System.String[]{"2B"});
AssertAnalyzesTo(a, "\"QUOTED\" word", new System.String[]{"\"QUOTED\"", "word"});
}
[Test]
public virtual void TestStop()
{
Analyzer a = new StopAnalyzer(Version.LUCENE_CURRENT);
AssertAnalyzesTo(a, "foo bar FOO BAR", new System.String[]{"foo", "bar", "foo", "bar"});
AssertAnalyzesTo(a, "foo a bar such FOO THESE BAR", new System.String[]{"foo", "bar", "foo", "bar"});
}
internal virtual void VerifyPayload(TokenStream ts)
{
IPayloadAttribute payloadAtt = ts.GetAttribute<IPayloadAttribute>();
for (byte b = 1; ; b++)
{
bool hasNext = ts.IncrementToken();
if (!hasNext)
break;
// System.out.println("id="+System.identityHashCode(nextToken) + " " + t);
// System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]);
Assert.AreEqual(b, payloadAtt.Payload.ToByteArray()[0]);
}
}
// Make sure old style next() calls result in a new copy of payloads
[Test]
public virtual void TestPayloadCopy()
{
System.String s = "how now brown cow";
TokenStream ts;
ts = new WhitespaceTokenizer(new System.IO.StringReader(s));
ts = new PayloadSetter(ts);
VerifyPayload(ts);
ts = new WhitespaceTokenizer(new System.IO.StringReader(s));
ts = new PayloadSetter(ts);
VerifyPayload(ts);
}
// LUCENE-1150: Just a compile time test, to ensure the
// StandardAnalyzer constants remain publicly accessible
public virtual void _testStandardConstants()
{
int x = StandardTokenizer.ALPHANUM;
x = StandardTokenizer.APOSTROPHE;
x = StandardTokenizer.ACRONYM;
x = StandardTokenizer.COMPANY;
x = StandardTokenizer.EMAIL;
x = StandardTokenizer.HOST;
x = StandardTokenizer.NUM;
x = StandardTokenizer.CJ;
System.String[] y = StandardTokenizer.TOKEN_TYPES;
}
private class MyStandardAnalyzer:StandardAnalyzer
{
public MyStandardAnalyzer() : base(Version.LUCENE_CURRENT)
{
}
public override TokenStream TokenStream(System.String field, System.IO.TextReader reader)
{
return new WhitespaceAnalyzer().TokenStream(field, reader);
}
}
[Test]
public virtual void TestSubclassOverridingOnlyTokenStream()
{
Analyzer a = new MyStandardAnalyzer();
TokenStream ts = a.ReusableTokenStream("field", new System.IO.StringReader("the"));
// StandardAnalyzer will discard "the" (it's a
// stopword), by my subclass will not:
Assert.IsTrue(ts.IncrementToken());
Assert.IsFalse(ts.IncrementToken());
}
[Test]
public void Test_LUCENE_3042_LUCENENET_433()
{
var testString = "t";
Analyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(Version.LUCENE_30);
var stream = analyzer.ReusableTokenStream("dummy", new System.IO.StringReader(testString));
stream.Reset();
while (stream.IncrementToken())
{
// consume
}
stream.End();
stream.Close();
AssertAnalyzesToReuse(analyzer, testString, new String[] { "t" });
}
}
class PayloadSetter:TokenFilter
{
private void InitBlock()
{
p = new Payload(data, 0, 1);
}
internal IPayloadAttribute payloadAtt;
public PayloadSetter(TokenStream input):base(input)
{
InitBlock();
payloadAtt = AddAttribute<IPayloadAttribute>();
}
internal byte[] data = new byte[1];
internal Payload p;
public override bool IncrementToken()
{
bool hasNext = input.IncrementToken();
if (!hasNext)
return false;
payloadAtt.Payload = p; // reuse the payload / byte[]
data[0]++;
return true;
}
}
}