linux-packaging-mono/external/Lucene.Net/test/contrib/Analyzers/El/GreekAnalyzerTest.cs

/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
*/

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.El;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
using Version=Lucene.Net.Util.Version;

namespace Lucene.Net.Analyzers.El
{
/*
 * A unit test class for verifying the correct operation of the GreekAnalyzer.
 *
 */
    [TestFixture]
public class GreekAnalyzerTest : BaseTokenStreamTestCase {

	/*
	 * Test the analysis of various greek strings.
	 *
	 * @throws Exception in case an error occurs
	 */
    [Test]
	public void testAnalyzer(){
		Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
		// Verify the correct analysis of capitals and small accented letters
        AssertAnalyzesTo(a,
                         "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
                         new String[]
                             {
                                 "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1",
                                 "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1",
                                 "\u03c3\u03b5\u03b9\u03c1\u03b1",
                                 "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
                                 "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3",
                                 "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3"
                             });
		// Verify the correct analysis of small letters with diaeresis and the elimination
		// of punctuation marks
        AssertAnalyzesTo(a,
                         "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9)     [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2]	-	\u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
                         new String[]
                             {
                                 "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1",
                                 "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3",
                                 "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3"
                             });
		// Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
		// as well as the elimination of stop words
        AssertAnalyzesTo(a,
                         "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
                         new String[]
                             {
                                 "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3",
                                 "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3",
                                 "\u03b1\u03bb\u03bb\u03bf\u03b9"
                             });
	}

    [Test]
	public void testReusableTokenStream(){
	    Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
	    // Verify the correct analysis of capitals and small accented letters
        AssertAnalyzesToReuse(a,
                              "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
                              new String[]
                                  {
                                      "\u03bc\u03b9\u03b1",
                                      "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1",
                                      "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1",
                                      "\u03c3\u03b5\u03b9\u03c1\u03b1",
                                      "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
                                      "\u03b5\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03b7\u03c3",
                                      "\u03b3\u03bb\u03c9\u03c3\u03c3\u03b1\u03c3"
                                  });
	    // Verify the correct analysis of small letters with diaeresis and the elimination
	    // of punctuation marks
        AssertAnalyzesToReuse(a,
                              "\u03a0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1 (\u03ba\u03b1\u03b9)     [\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03ad\u03c2] -   \u0391\u039d\u0391\u0393\u039a\u0395\u03a3",
                              new String[]
                                  {
                                      "\u03c0\u03c1\u03bf\u03b9\u03bf\u03bd\u03c4\u03b1",
                                      "\u03c0\u03bf\u03bb\u03bb\u03b1\u03c0\u03bb\u03b5\u03c3",
                                      "\u03b1\u03bd\u03b1\u03b3\u03ba\u03b5\u03c3"
                                  });
	    // Verify the correct analysis of capital accented letters and capitalletters with diaeresis,
	    // as well as the elimination of stop words
        AssertAnalyzesToReuse(a,
                              "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
                              new String[]
                                  {
                                      "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3",
                                      "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3",
                                      "\u03b1\u03bb\u03bb\u03bf\u03b9"
                                  });
	}
}

}