Jo Shields a575963da9 Imported Upstream version 3.6.0
Former-commit-id: da6be194a6b1221998fc28233f2503bd61dd9d14
2014-08-13 10:39:27 +01:00

103 lines
3.5 KiB
C#

/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Cz;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Analyzers.Cz
{
/*
* Test the CzechAnalyzer
*
* CzechAnalyzer is like a StandardAnalyzer with a custom stopword list.
*
*/
[TestFixture]
public class TestCzechAnalyzer : BaseTokenStreamTestCase
{
string customStopFile = @"Cz\customStopWordFile.txt";
[Test]
public void TestStopWord()
{
AssertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_CURRENT), "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
}
[Test]
public void TestReusableTokenStream()
{
Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT);
AssertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
AssertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" });
}
/*
* An input stream that always throws IOException for testing.
*/
private class UnreliableInputStream : MemoryStream
{
public override int Read(byte[] buffer, int offset, int count)
{
throw new IOException();
}
}
/*
* The loadStopWords method does not throw IOException on error,
* instead previously it set the stoptable to null (versus empty)
* this would cause a NPE when it is time to create the StopFilter.
*/
[Test]
public void TestInvalidStopWordFile()
{
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
cz.LoadStopWords(new UnreliableInputStream(), Encoding.UTF8);
AssertAnalyzesTo(cz, "Pokud mluvime o volnem",
new String[] { "pokud", "mluvime", "o", "volnem" });
}
/*
* Test that changes to the stop table via loadStopWords are applied immediately
* when using reusable token streams.
*/
[Test]
public void TestStopWordFileReuse()
{
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
AssertAnalyzesToReuse(cz, "Česká Republika",
new String[] { "česká", "republika" });
Stream stopwords = new FileStream(customStopFile, FileMode.Open, FileAccess.Read);
cz.LoadStopWords(stopwords, Encoding.UTF8);
AssertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká" });
}
}
}