261 lines
10 KiB
C#
261 lines
10 KiB
C#
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
using System;
|
|
using Lucene.Net.Documents;
|
|
using Analyzer = Lucene.Net.Analysis.Analyzer;
|
|
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
|
|
using Document = Lucene.Net.Documents.Document;
|
|
using Field = Lucene.Net.Documents.Field;
|
|
using Directory = Lucene.Net.Store.Directory;
|
|
using Similarity = Lucene.Net.Search.Similarity;
|
|
|
|
namespace Lucene.Net.Index
|
|
{
|
|
|
|
class DocHelper
|
|
{
|
|
public const System.String FIELD_1_TEXT = "field one text";
|
|
public const System.String TEXT_FIELD_1_KEY = "textField1";
|
|
public static Field textField1;
|
|
|
|
public const System.String FIELD_2_TEXT = "field field field two text";
|
|
//Fields will be lexicographically sorted. So, the order is: field, text, two
|
|
public static readonly int[] FIELD_2_FREQS = new int[]{3, 1, 1};
|
|
public const System.String TEXT_FIELD_2_KEY = "textField2";
|
|
public static Field textField2;
|
|
|
|
|
|
public const System.String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
|
|
public const System.String TEXT_FIELD_3_KEY = "textField3";
|
|
public static Field textField3;
|
|
|
|
public const System.String KEYWORD_TEXT = "Keyword";
|
|
public const System.String KEYWORD_FIELD_KEY = "keyField";
|
|
public static Field keyField;
|
|
|
|
public const System.String NO_NORMS_TEXT = "omitNormsText";
|
|
public const System.String NO_NORMS_KEY = "omitNorms";
|
|
public static Field noNormsField;
|
|
|
|
public const System.String NO_TF_TEXT = "analyzed with no tf and positions";
|
|
public const System.String NO_TF_KEY = "omitTermFreqAndPositions";
|
|
public static Field noTFField;
|
|
|
|
public const System.String UNINDEXED_FIELD_TEXT = "unindexed field text";
|
|
public const System.String UNINDEXED_FIELD_KEY = "unIndField";
|
|
public static Field unIndField;
|
|
|
|
|
|
public const System.String UNSTORED_1_FIELD_TEXT = "unstored field text";
|
|
public const System.String UNSTORED_FIELD_1_KEY = "unStoredField1";
|
|
public static Field unStoredField1;
|
|
|
|
public const System.String UNSTORED_2_FIELD_TEXT = "unstored field text";
|
|
public const System.String UNSTORED_FIELD_2_KEY = "unStoredField2";
|
|
public static Field unStoredField2;
|
|
|
|
public const System.String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
|
|
public static byte[] LAZY_FIELD_BINARY_BYTES;
|
|
public static Field lazyFieldBinary;
|
|
|
|
public const System.String LAZY_FIELD_KEY = "lazyField";
|
|
public const System.String LAZY_FIELD_TEXT = "These are some field bytes";
|
|
public static Field lazyField;
|
|
|
|
public const System.String LARGE_LAZY_FIELD_KEY = "largeLazyField";
|
|
public static System.String LARGE_LAZY_FIELD_TEXT;
|
|
public static Field largeLazyField;
|
|
|
|
//From Issue 509
|
|
public const System.String FIELD_UTF1_TEXT = "field one \u4e00text";
|
|
public const System.String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
|
|
public static Field textUtfField1;
|
|
|
|
public const System.String FIELD_UTF2_TEXT = "field field field \u4e00two text";
|
|
//Fields will be lexicographically sorted. So, the order is: field, text, two
|
|
public static readonly int[] FIELD_UTF2_FREQS = new int[]{3, 1, 1};
|
|
public const System.String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
|
|
public static Field textUtfField2;
|
|
|
|
|
|
|
|
|
|
public static System.Collections.IDictionary nameValues = null;
|
|
|
|
// ordered list of all the fields...
|
|
// could use LinkedHashMap for this purpose if Java1.4 is OK
|
|
public static Field[] fields = null;
|
|
|
|
// Map<String fieldName, Fieldable field>
|
|
public static System.Collections.IDictionary all = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary indexed = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary stored = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary unstored = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary unindexed = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary termvector = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary notermvector = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary lazy = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary noNorms = new System.Collections.Hashtable();
|
|
public static System.Collections.IDictionary noTf = new System.Collections.Hashtable();
|
|
|
|
|
|
private static void Add(System.Collections.IDictionary map, IFieldable field)
|
|
{
|
|
map[field.Name] = field;
|
|
}
|
|
|
|
/// <summary> Adds the fields above to a document </summary>
|
|
/// <param name="doc">The document to write
|
|
/// </param>
|
|
public static void SetupDoc(Document doc)
|
|
{
|
|
for (int i = 0; i < fields.Length; i++)
|
|
{
|
|
doc.Add(fields[i]);
|
|
}
|
|
}
|
|
|
|
/// <summary> Writes the document to the directory using a segment
|
|
/// named "test"; returns the SegmentInfo describing the new
|
|
/// segment
|
|
/// </summary>
|
|
/// <param name="dir">
|
|
/// </param>
|
|
/// <param name="doc">
|
|
/// </param>
|
|
/// <throws> IOException </throws>
|
|
public static SegmentInfo WriteDoc(Directory dir, Document doc)
|
|
{
|
|
return WriteDoc(dir, new WhitespaceAnalyzer(), Similarity.Default, doc);
|
|
}
|
|
|
|
/// <summary> Writes the document to the directory using the analyzer
|
|
/// and the similarity score; returns the SegmentInfo
|
|
/// describing the new segment
|
|
/// </summary>
|
|
/// <param name="dir">
|
|
/// </param>
|
|
/// <param name="analyzer">
|
|
/// </param>
|
|
/// <param name="similarity">
|
|
/// </param>
|
|
/// <param name="doc">
|
|
/// </param>
|
|
/// <throws> IOException </throws>
|
|
public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
|
|
{
|
|
IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
|
|
writer.SetSimilarity(similarity);
|
|
//writer.setUseCompoundFile(false);
|
|
writer.AddDocument(doc);
|
|
writer.Commit();
|
|
SegmentInfo info = writer.NewestSegment();
|
|
writer.Close();
|
|
return info;
|
|
}
|
|
|
|
public static int NumFields(Document doc)
|
|
{
|
|
return doc.GetFields().Count;
|
|
}
|
|
static DocHelper()
|
|
{
|
|
textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
|
|
textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
|
textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
|
|
{
|
|
textField3.OmitNorms = true;
|
|
}
|
|
keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
|
noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
|
noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED);
|
|
{
|
|
noTFField.OmitTermFreqAndPositions = true;
|
|
}
|
|
unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO);
|
|
unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
|
|
unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
|
|
lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
|
|
textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
|
|
textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
|
fields = new Field[] { textField1, textField2, textField3, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField };
|
|
{
|
|
//Initialize the large Lazy Field
|
|
System.Text.StringBuilder buffer = new System.Text.StringBuilder();
|
|
for (int i = 0; i < 10000; i++)
|
|
{
|
|
buffer.Append("Lazily loading lengths of language in lieu of laughing ");
|
|
}
|
|
|
|
try
|
|
{
|
|
LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes");
|
|
}
|
|
catch (System.IO.IOException)
|
|
{
|
|
}
|
|
lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
|
|
fields[fields.Length - 2] = lazyFieldBinary;
|
|
LARGE_LAZY_FIELD_TEXT = buffer.ToString();
|
|
largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
|
|
|
|
fields[fields.Length - 1] = largeLazyField;
|
|
for (int i = 0; i < fields.Length; i++)
|
|
{
|
|
IFieldable f = fields[i];
|
|
Add(all, f);
|
|
if (f.IsIndexed)
|
|
Add(indexed, f);
|
|
else
|
|
Add(unindexed, f);
|
|
if (f.IsTermVectorStored)
|
|
Add(termvector, f);
|
|
if (f.IsIndexed && !f.IsTermVectorStored)
|
|
Add(notermvector, f);
|
|
if (f.IsStored)
|
|
Add(stored, f);
|
|
else
|
|
Add(unstored, f);
|
|
if (f.OmitNorms)
|
|
Add(noNorms, f);
|
|
if (f.OmitTermFreqAndPositions)
|
|
Add(noTf, f);
|
|
if (f.IsLazy)
|
|
Add(lazy, f);
|
|
}
|
|
}
|
|
{
|
|
nameValues = new System.Collections.Hashtable();
|
|
nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT;
|
|
nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT;
|
|
nameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT;
|
|
nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT;
|
|
nameValues[NO_NORMS_KEY] = NO_NORMS_TEXT;
|
|
nameValues[NO_TF_KEY] = NO_TF_TEXT;
|
|
nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT;
|
|
nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT;
|
|
nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT;
|
|
nameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT;
|
|
nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES;
|
|
nameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT;
|
|
nameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT;
|
|
nameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT;
|
|
}
|
|
}
|
|
}
|
|
} |