linux-packaging-mono/external/Lucene.Net/test/contrib/Analyzers/Fr/TestFrenchAnalyzer.cs

/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
*/

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Fr;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
using Version = Lucene.Net.Util.Version;

namespace Lucene.Net.Analyzers.Fr
{
    /*
     * Test case for FrenchAnalyzer.
     *
     * @version   $version$
     */
    [TestFixture]
    public class TestFrenchAnalyzer : BaseTokenStreamTestCase
    {
        [Test]
        public void TestAnalyzer()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);

            AssertAnalyzesTo(fa, "", new String[0]);

            AssertAnalyzesTo(
                fa,
                "chien chat cheval",
                new String[] {"chien", "chat", "cheval"});

            AssertAnalyzesTo(
                fa,
                "chien CHAT CHEVAL",
                new String[] {"chien", "chat", "cheval"});

            AssertAnalyzesTo(
                fa,
                "  chien  ,? + = -  CHAT /: > CHEVAL",
                new String[] {"chien", "chat", "cheval"});

            AssertAnalyzesTo(fa, "chien++", new String[] {"chien"});

            AssertAnalyzesTo(
                fa,
                "mot \"entreguillemet\"",
                new String[] {"mot", "entreguillemet"});

            // let's do some french specific tests now	

            /* 1. couldn't resist
             I would expect this to stay one term as in French the minus 
            sign is often used for composing words */
            AssertAnalyzesTo(
                fa,
                "Jean-François",
                new String[] {"jean", "françois"});

            // 2. stopwords
            AssertAnalyzesTo(
                fa,
                "le la chien les aux chat du des à cheval",
                new String[] {"chien", "chat", "cheval"});

            // some nouns and adjectives
            AssertAnalyzesTo(
                fa,
                "lances chismes habitable chiste éléments captifs",
                new String[]
                    {
                        "lanc",
                        "chism",
                        "habit",
                        "chist",
                        "élément",
                        "captif"
                    });

            // some verbs
            AssertAnalyzesTo(
                fa,
                "finissions souffrirent rugissante",
                new String[] {"fin", "souffr", "rug"});

            // some everything else
            // aujourd'hui stays one term which is OK
            AssertAnalyzesTo(
                fa,
                "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
                new String[]
                    {
                        "c3po",
                        "aujourd'hui",
                        "oeuf",
                        "ïâöûàä",
                        "anticonstitutionnel",
                        "jav"
                    });

            // some more everything else
            // here 1940-1945 stays as one term, 1940:1945 not ?
            AssertAnalyzesTo(
                fa,
                "33Bis 1940-1945 1940:1945 (---i+++)*",
                new String[] {"33bis", "1940-1945", "1940", "1945", "i"});


            AssertAnalyzesTo(fa, "abbeaux abdication abdications abondamment marieuses pageaux", new[]
                                                                                                     {
                                                                                                         "abbeau",
                                                                                                         "abdiqu",
                                                                                                         "abdiqu",
                                                                                                         "abond",
                                                                                                         "marieux",
                                                                                                         "pageau"
                                                                                                     });
        }

        [Test]
        public void TestReusableTokenStream()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
            // stopwords
            AssertAnalyzesToReuse(
                fa,
                "le la chien les aux chat du des à cheval",
                new String[] {"chien", "chat", "cheval"});

            // some nouns and adjectives
            AssertAnalyzesToReuse(
                fa,
                "lances chismes habitable chiste éléments captifs",
                new String[]
                    {
                        "lanc",
                        "chism",
                        "habit",
                        "chist",
                        "élément",
                        "captif"
                    });
        }

        /* 
         * Test that changes to the exclusion table are applied immediately
         * when using reusable token streams.
         */
        [Test]
        public void TestExclusionTableReuse()
        {
            FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
            AssertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
            fa.SetStemExclusionTable(new String[] { "habitable" });
            AssertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
        }
    }
}
Imported Upstream version 3.6.0 Former-commit-id: da6be194a6b1221998fc28233f2503bd61dd9d14 2014-08-13 10:39:27 +01:00			`/*`
			`*`
			`* Licensed to the Apache Software Foundation (ASF) under one`
			`* or more contributor license agreements. See the NOTICE file`
			`* distributed with this work for additional information`
			`* regarding copyright ownership. The ASF licenses this file`
			`* to you under the Apache License, Version 2.0 (the`
			`* "License"); you may not use this file except in compliance`
			`* with the License. You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing,`
			`* software distributed under the License is distributed on an`
			`* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY`
			`* KIND, either express or implied. See the License for the`
			`* specific language governing permissions and limitations`
			`* under the License.`
			`*`
			`*/`

			`using System;`
			`using System.Collections.Generic;`
			`using System.Linq;`
			`using System.Text;`
			`using Lucene.Net.Analysis;`
			`using Lucene.Net.Analysis.Fr;`
			`using Lucene.Net.Test.Analysis;`
			`using NUnit.Framework;`
			`using Version = Lucene.Net.Util.Version;`

			`namespace Lucene.Net.Analyzers.Fr`
			`{`
			`/*`
			`* Test case for FrenchAnalyzer.`
			`*`
			`* @version $version$`
			`*/`
			`[TestFixture]`
			`public class TestFrenchAnalyzer : BaseTokenStreamTestCase`
			`{`
			`[Test]`
			`public void TestAnalyzer()`
			`{`
			`FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);`

			`AssertAnalyzesTo(fa, "", new String[0]);`

			`AssertAnalyzesTo(`
			`fa,`
			`"chien chat cheval",`
			`new String[] {"chien", "chat", "cheval"});`

			`AssertAnalyzesTo(`
			`fa,`
			`"chien CHAT CHEVAL",`
			`new String[] {"chien", "chat", "cheval"});`

			`AssertAnalyzesTo(`
			`fa,`
			`" chien ,? + = - CHAT /: > CHEVAL",`
			`new String[] {"chien", "chat", "cheval"});`

			`AssertAnalyzesTo(fa, "chien++", new String[] {"chien"});`

			`AssertAnalyzesTo(`
			`fa,`
			`"mot \"entreguillemet\"",`
			`new String[] {"mot", "entreguillemet"});`

			`// let's do some french specific tests now`

			`/* 1. couldn't resist`
			`I would expect this to stay one term as in French the minus`
			`sign is often used for composing words */`
			`AssertAnalyzesTo(`
			`fa,`
			`"Jean-François",`
			`new String[] {"jean", "françois"});`

			`// 2. stopwords`
			`AssertAnalyzesTo(`
			`fa,`
			`"le la chien les aux chat du des à cheval",`
			`new String[] {"chien", "chat", "cheval"});`

			`// some nouns and adjectives`
			`AssertAnalyzesTo(`
			`fa,`
			`"lances chismes habitable chiste éléments captifs",`
			`new String[]`
			`{`
			`"lanc",`
			`"chism",`
			`"habit",`
			`"chist",`
			`"élément",`
			`"captif"`
			`});`

			`// some verbs`
			`AssertAnalyzesTo(`
			`fa,`
			`"finissions souffrirent rugissante",`
			`new String[] {"fin", "souffr", "rug"});`

			`// some everything else`
			`// aujourd'hui stays one term which is OK`
			`AssertAnalyzesTo(`
			`fa,`
			`"C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",`
			`new String[]`
			`{`
			`"c3po",`
			`"aujourd'hui",`
			`"oeuf",`
			`"ïâöûàä",`
			`"anticonstitutionnel",`
			`"jav"`
			`});`

			`// some more everything else`
			`// here 1940-1945 stays as one term, 1940:1945 not ?`
			`AssertAnalyzesTo(`
			`fa,`
			`"33Bis 1940-1945 1940:1945 (---i+++)*",`
			`new String[] {"33bis", "1940-1945", "1940", "1945", "i"});`


			`AssertAnalyzesTo(fa, "abbeaux abdication abdications abondamment marieuses pageaux", new[]`
			`{`
			`"abbeau",`
			`"abdiqu",`
			`"abdiqu",`
			`"abond",`
			`"marieux",`
			`"pageau"`
			`});`
			`}`

			`[Test]`
			`public void TestReusableTokenStream()`
			`{`
			`FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);`
			`// stopwords`
			`AssertAnalyzesToReuse(`
			`fa,`
			`"le la chien les aux chat du des à cheval",`
			`new String[] {"chien", "chat", "cheval"});`

			`// some nouns and adjectives`
			`AssertAnalyzesToReuse(`
			`fa,`
			`"lances chismes habitable chiste éléments captifs",`
			`new String[]`
			`{`
			`"lanc",`
			`"chism",`
			`"habit",`
			`"chist",`
			`"élément",`
			`"captif"`
			`});`
			`}`

			`/*`
			`* Test that changes to the exclusion table are applied immediately`
			`* when using reusable token streams.`
			`*/`
			`[Test]`
			`public void TestExclusionTableReuse()`
			`{`
			`FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);`
			`AssertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });`
			`fa.SetStemExclusionTable(new String[] { "habitable" });`
			`AssertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });`
			`}`
			`}`
			`}`