diff --git a/Makefile.in b/Makefile.in
index f0873e25ff..a80823be98 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -85,7 +85,7 @@ DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(srcdir)/config.h.in mkinstalldirs \
$(srcdir)/mono-core.spec.in $(srcdir)/mono-uninstalled.pc.in \
AUTHORS COPYING.LIB ChangeLog NEWS compile config.guess \
- config.rpath config.sub install-sh missing ltmain.sh
+ config.rpath config.sub depcomp install-sh missing ltmain.sh
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/iconv.m4 \
$(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
diff --git a/configure.REMOVED.git-id b/configure.REMOVED.git-id
index 61434bb1ea..53ebe19d89 100644
--- a/configure.REMOVED.git-id
+++ b/configure.REMOVED.git-id
@@ -1 +1 @@
-0153a4d763e8fdec1bfc6ad8a10db68ae9a8b3ad
\ No newline at end of file
+94ef65996a186293ae651e688580672f8de4b880
\ No newline at end of file
diff --git a/configure.ac.REMOVED.git-id b/configure.ac.REMOVED.git-id
index f3f1ee7eb9..8106806460 100644
--- a/configure.ac.REMOVED.git-id
+++ b/configure.ac.REMOVED.git-id
@@ -1 +1 @@
-4d5ca3baf19b55971eba3a1e4c430304031594ed
\ No newline at end of file
+6e1f9bd8a55b889798c88191650b5752d2b58e3b
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/README.md b/external/Lucene.Net.Light/README.md
new file mode 100644
index 0000000000..250cada378
--- /dev/null
+++ b/external/Lucene.Net.Light/README.md
@@ -0,0 +1,6 @@
+This is a subset of Lucene.Net as used by Mono's Monodoc tool.
+
+This module is a checout of:
+
+ git://github.com/mono/lucene.net.git
+ 88fb67b07621dfed054d8d75fd50672fb26349df
diff --git a/external/Lucene.Net.Light/src/core/Analysis/ASCIIFoldingFilter.cs.REMOVED.git-id b/external/Lucene.Net.Light/src/core/Analysis/ASCIIFoldingFilter.cs.REMOVED.git-id
new file mode 100644
index 0000000000..b821485546
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/ASCIIFoldingFilter.cs.REMOVED.git-id
@@ -0,0 +1 @@
+61338700516ffa26e2a36fef4a0843a5fbf01c62
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Analyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/Analyzer.cs
new file mode 100644
index 0000000000..cea0ee30cc
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Analyzer.cs
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+ /// An Analyzer builds TokenStreams, which analyze text. It thus represents a
+ /// policy for extracting index terms from text.
+ ///
+ /// Typical implementations first build a Tokenizer, which breaks the stream of
+ /// characters from the Reader into raw Tokens. One or more TokenFilters may
+ /// then be applied to the output of the Tokenizer.
+ ///
+ public abstract class Analyzer : IDisposable
+ {
+ /// Creates a TokenStream which tokenizes all the text in the provided
+ /// Reader. Must be able to handle null field name for
+ /// backward compatibility.
+ ///
+ public abstract TokenStream TokenStream(String fieldName, System.IO.TextReader reader);
+
+ /// Creates a TokenStream that is allowed to be re-used
+ /// from the previous time that the same thread called
+ /// this method. Callers that do not need to use more
+ /// than one TokenStream at the same time from this
+ /// analyzer should use this method for better
+ /// performance.
+ ///
+ public virtual TokenStream ReusableTokenStream(String fieldName, System.IO.TextReader reader)
+ {
+ return TokenStream(fieldName, reader);
+ }
+
+ private CloseableThreadLocal tokenStreams = new CloseableThreadLocal();
+ private bool isDisposed;
+
+ /// Used by Analyzers that implement reusableTokenStream
+ /// to retrieve previously saved TokenStreams for re-use
+ /// by the same thread.
+ ///
+ protected internal virtual object PreviousTokenStream
+ {
+ get
+ {
+ if (tokenStreams == null)
+ {
+ throw new AlreadyClosedException("this Analyzer is closed");
+ }
+ return tokenStreams.Get();
+ }
+ set
+ {
+ if (tokenStreams == null)
+ {
+ throw new AlreadyClosedException("this Analyzer is closed");
+ }
+ tokenStreams.Set(value);
+ }
+ }
+
+ [Obsolete()]
+ protected internal bool overridesTokenStreamMethod = false;
+
+ /// This is only present to preserve
+ /// back-compat of classes that subclass a core analyzer
+ /// and override tokenStream but not reusableTokenStream
+ ///
+ ///
+ /// Java uses Class<? extends Analyer> to constrain to
+ /// only Types that inherit from Analyzer. C# does not have a generic type class,
+ /// ie Type<t>. The method signature stays the same, and an exception may
+ /// still be thrown, if the method doesn't exist.
+ ///
+ [Obsolete("This is only present to preserve back-compat of classes that subclass a core analyzer and override tokenStream but not reusableTokenStream ")]
+ protected internal virtual void SetOverridesTokenStreamMethod()
+ where TClass : Analyzer
+ {
+ try
+ {
+ System.Reflection.MethodInfo m = this.GetType().GetMethod("TokenStream", new[] { typeof(string), typeof(System.IO.TextReader) });
+ overridesTokenStreamMethod = m.DeclaringType != typeof(TClass);
+ }
+ catch (MethodAccessException)
+ {
+ // can't happen, as baseClass is subclass of Analyzer
+ overridesTokenStreamMethod = false;
+ }
+ }
+
+
+ /// Invoked before indexing a Fieldable instance if
+ /// terms have already been added to that field. This allows custom
+ /// analyzers to place an automatic position increment gap between
+ /// Fieldable instances using the same field name. The default value
+ /// position increment gap is 0. With a 0 position increment gap and
+ /// the typical default token position increment of 1, all terms in a field,
+ /// including across Fieldable instances, are in successive positions, allowing
+ /// exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
+ ///
+ ///
+ /// Fieldable name being indexed.
+ ///
+ /// position increment gap, added to the next token emitted from
+ ///
+ public virtual int GetPositionIncrementGap(String fieldName)
+ {
+ return 0;
+ }
+
+ /// Just like , except for
+ /// Token offsets instead. By default this returns 1 for
+ /// tokenized fields and, as if the fields were joined
+ /// with an extra space character, and 0 for un-tokenized
+ /// fields. This method is only called if the field
+ /// produced at least one token for indexing.
+ ///
+ ///
+ /// the field just indexed
+ ///
+ /// offset gap, added to the next token emitted from
+ ///
+ public virtual int GetOffsetGap(IFieldable field)
+ {
+ return field.IsTokenized ? 1 : 0;
+ }
+
+ /// Frees persistent resources used by this Analyzer
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public virtual void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (tokenStreams != null)
+ {
+ tokenStreams.Close();
+ tokenStreams = null;
+ }
+ }
+ isDisposed = true;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/BaseCharFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/BaseCharFilter.cs
new file mode 100644
index 0000000000..b84fce08c8
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/BaseCharFilter.cs
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+
+ ///
+ /// * Base utility class for implementing a .
+ /// * You subclass this, and then record mappings by calling
+ /// * , and then invoke the correct
+ /// * method to correct an offset.
+ ///
+ public abstract class BaseCharFilter : CharFilter
+ {
+
+ private int[] offsets;
+ private int[] diffs;
+ private int size = 0;
+
+ protected BaseCharFilter(CharStream @in) : base(@in)
+ {
+ }
+
+ /* Retrieve the corrected offset. */
+ //@Override
+ protected internal override int Correct(int currentOff)
+ {
+ if (offsets == null || currentOff < offsets[0])
+ {
+ return currentOff;
+ }
+
+ int hi = size - 1;
+ if (currentOff >= offsets[hi])
+ return currentOff + diffs[hi];
+
+ int lo = 0;
+ int mid = -1;
+
+ while (hi >= lo)
+ {
+ mid = Number.URShift(lo + hi, 1);
+ if (currentOff < offsets[mid])
+ hi = mid - 1;
+ else if (currentOff > offsets[mid])
+ lo = mid + 1;
+ else
+ return currentOff + diffs[mid];
+ }
+
+ if (currentOff < offsets[mid])
+ return mid == 0 ? currentOff : currentOff + diffs[mid - 1];
+ return currentOff + diffs[mid];
+ }
+
+ protected int LastCumulativeDiff
+ {
+ get
+ {
+ return offsets == null ? 0 : diffs[size - 1];
+ }
+ }
+
+ [Obsolete("Use LastCumulativeDiff property instead")]
+ protected int GetLastCumulativeDiff()
+ {
+ return LastCumulativeDiff;
+ }
+
+ protected void AddOffCorrectMap(int off, int cumulativeDiff)
+ {
+ if (offsets == null)
+ {
+ offsets = new int[64];
+ diffs = new int[64];
+ }
+ else if (size == offsets.Length)
+ {
+ offsets = ArrayUtil.Grow(offsets);
+ diffs = ArrayUtil.Grow(diffs);
+ }
+
+ offsets[size] = off;
+ diffs[size++] = cumulativeDiff;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/CachingTokenFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/CachingTokenFilter.cs
new file mode 100644
index 0000000000..c5f7694d39
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/CachingTokenFilter.cs
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// This class can be used if the token attributes of a TokenStream
+ /// are intended to be consumed more than once. It caches
+ /// all token attribute states locally in a List.
+ ///
+ ///
CachingTokenFilter implements the optional method
+ /// , which repositions the
+ /// stream to the first Token.
+ ///
+ public sealed class CachingTokenFilter : TokenFilter
+ {
+ private System.Collections.Generic.LinkedList cache = null;
+ private System.Collections.Generic.IEnumerator iterator = null;
+ private State finalState;
+
+ public CachingTokenFilter(TokenStream input):base(input)
+ {
+ }
+
+ public override bool IncrementToken()
+ {
+ if (cache == null)
+ {
+ // fill cache lazily
+ cache = new System.Collections.Generic.LinkedList();
+ FillCache();
+ iterator = cache.GetEnumerator();
+ }
+
+ if (!iterator.MoveNext())
+ {
+ // the cache is exhausted, return false
+ return false;
+ }
+ // Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
+ RestoreState(iterator.Current);
+ return true;
+ }
+
+ public override void End()
+ {
+ if (finalState != null)
+ {
+ RestoreState(finalState);
+ }
+ }
+
+ public override void Reset()
+ {
+ if (cache != null)
+ {
+ iterator = cache.GetEnumerator();
+ }
+ }
+
+ private void FillCache()
+ {
+ while (input.IncrementToken())
+ {
+ cache.AddLast(CaptureState());
+ }
+ // capture final state
+ input.End();
+ finalState = CaptureState();
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharArraySet.cs b/external/Lucene.Net.Light/src/core/Analysis/CharArraySet.cs
new file mode 100644
index 0000000000..e7df0ba284
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/CharArraySet.cs
@@ -0,0 +1,517 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Linq;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+ /// A simple class that stores Strings as char[]'s in a
+ /// hash table. Note that this is not a general purpose
+ /// class. For example, it cannot remove items from the
+ /// set, nor does it resize its hash table to be smaller,
+ /// etc. It is designed to be quick to test if a char[]
+ /// is in the set without the necessity of converting it
+ /// to a String first.
+ ///
+ /// Please note: This class implements but
+ /// does not behave like it should in all cases. The generic type is
+ /// , because you can add any object to it,
+ /// that has a string representation. The add methods will use
+ /// and store the result using a
+ /// buffer. The same behaviour have the methods.
+ /// The method returns an IEnumerable.
+ /// For type safety also {@link #stringIterator()} is provided.
+ ///
+ // TODO: java uses wildcards, .net doesn't have this, easiest way is to
+ // make the entire class generic. Ultimately, though, since this
+ // works with strings, I can't think of a reason not to just declare
+ // this as an ISet.
+ public class CharArraySet : ISet
+ {
+ bool _ReadOnly = false;
+ const int INIT_SIZE = 8;
+ char[][] _Entries;
+ int _Count;
+ bool _IgnoreCase;
+ public static CharArraySet EMPTY_SET = UnmodifiableSet(new CharArraySet(0, false));
+
+ private void Init(int startSize, bool ignoreCase)
+ {
+ this._IgnoreCase = ignoreCase;
+ int size = INIT_SIZE;
+ while (startSize + (startSize >> 2) > size)
+ size <<= 1;
+ _Entries = new char[size][];
+ }
+
+ /// Create set with enough capacity to hold startSize
+ /// terms
+ ///
+ public CharArraySet(int startSize, bool ignoreCase)
+ {
+ Init(startSize, ignoreCase);
+ }
+
+ public CharArraySet(IEnumerable c, bool ignoreCase)
+ {
+ Init(c.Count(), ignoreCase);
+ AddItems(c);
+ }
+
+ /// Create set from a Collection of char[] or String
+ public CharArraySet(IEnumerable c, bool ignoreCase)
+ {
+ Init(c.Count(), ignoreCase);
+ AddItems(c);
+ }
+
+ private void AddItems(IEnumerable items)
+ {
+ foreach(var item in items)
+ {
+ Add(item.ToString());
+ }
+ }
+
+ /// Create set from entries
+ private CharArraySet(char[][] entries, bool ignoreCase, int count)
+ {
+ this._Entries = entries;
+ this._IgnoreCase = ignoreCase;
+ this._Count = count;
+ }
+
+ /// true if the len chars of text starting at off
+ /// are in the set
+ ///
+ public virtual bool Contains(char[] text, int off, int len)
+ {
+ return _Entries[GetSlot(text, off, len)] != null;
+ }
+
+ public virtual bool Contains(string text)
+ {
+ return _Entries[GetSlot(text)] != null;
+ }
+
+
+ private int GetSlot(char[] text, int off, int len)
+ {
+ int code = GetHashCode(text, off, len);
+ int pos = code & (_Entries.Length - 1);
+ char[] text2 = _Entries[pos];
+ if (text2 != null && !Equals(text, off, len, text2))
+ {
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ pos = code & (_Entries.Length - 1);
+ text2 = _Entries[pos];
+ }
+ while (text2 != null && !Equals(text, off, len, text2));
+ }
+ return pos;
+ }
+
+ /// Returns true if the String is in the set
+ private int GetSlot(string text)
+ {
+ int code = GetHashCode(text);
+ int pos = code & (_Entries.Length - 1);
+ char[] text2 = _Entries[pos];
+ if (text2 != null && !Equals(text, text2))
+ {
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ pos = code & (_Entries.Length - 1);
+ text2 = _Entries[pos];
+ }
+ while (text2 != null && !Equals(text, text2));
+ }
+ return pos;
+ }
+
+ public bool Add(string text)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+ return Add(text.ToCharArray());
+ }
+
+ /// Add this char[] directly to the set.
+ /// If ignoreCase is true for this Set, the text array will be directly modified.
+ /// The user should never modify this text array after calling this method.
+ ///
+ public bool Add(char[] text)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+
+ if (_IgnoreCase)
+ for (int i = 0; i < text.Length; i++)
+ text[i] = Char.ToLower(text[i]);
+ int slot = GetSlot(text, 0, text.Length);
+ if (_Entries[slot] != null)
+ return false;
+ _Entries[slot] = text;
+ _Count++;
+
+ if (_Count + (_Count >> 2) > _Entries.Length)
+ {
+ Rehash();
+ }
+
+ return true;
+ }
+
+ private bool Equals(char[] text1, int off, int len, char[] text2)
+ {
+ if (len != text2.Length)
+ return false;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (char.ToLower(text1[off + i]) != text2[i])
+ return false;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (text1[off + i] != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private bool Equals(string text1, char[] text2)
+ {
+ int len = text1.Length;
+ if (len != text2.Length)
+ return false;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (char.ToLower(text1[i]) != text2[i])
+ return false;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ if (text1[i] != text2[i])
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void Rehash()
+ {
+ int newSize = 2 * _Entries.Length;
+ char[][] oldEntries = _Entries;
+ _Entries = new char[newSize][];
+
+ for (int i = 0; i < oldEntries.Length; i++)
+ {
+ char[] text = oldEntries[i];
+ if (text != null)
+ {
+ // todo: could be faster... no need to compare strings on collision
+ _Entries[GetSlot(text, 0, text.Length)] = text;
+ }
+ }
+ }
+
+ private int GetHashCode(char[] text, int offset, int len)
+ {
+ int code = 0;
+ int stop = offset + len;
+ if (_IgnoreCase)
+ {
+ for (int i = offset; i < stop; i++)
+ {
+ code = code * 31 + char.ToLower(text[i]);
+ }
+ }
+ else
+ {
+ for (int i = offset; i < stop; i++)
+ {
+ code = code * 31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ private int GetHashCode(string text)
+ {
+ int code = 0;
+ int len = text.Length;
+ if (_IgnoreCase)
+ {
+ for (int i = 0; i < len; i++)
+ {
+ code = code * 31 + char.ToLower(text[i]);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ code = code * 31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ public int Count
+ {
+ get { return _Count; }
+ }
+
+ public bool IsEmpty
+ {
+ get { return _Count == 0; }
+ }
+
+ public bool Contains(object item)
+ {
+ var text = item as char[];
+ return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString());
+ }
+
+ public bool Add(object item)
+ {
+ return Add(item.ToString());
+ }
+
+ void ICollection.Add(string item)
+ {
+ this.Add(item);
+ }
+
+ ///
+ /// Returns an unmodifiable . This allows to provide
+ /// unmodifiable views of internal sets for "read-only" use
+ ///
+ /// A Set for which the unmodifiable set it returns.
+ /// A new unmodifiable
+ /// ArgumentNullException of the given set is null
+ public static CharArraySet UnmodifiableSet(CharArraySet set)
+ {
+ if(set == null)
+ throw new ArgumentNullException("Given set is null");
+ if (set == EMPTY_SET)
+ return EMPTY_SET;
+ if (set._ReadOnly)
+ return set;
+
+ var newSet = new CharArraySet(set._Entries, set._IgnoreCase, set.Count) {IsReadOnly = true};
+ return newSet;
+ }
+
+ ///
+ /// returns a copy of the given set as a . If the given set
+ /// is a the ignoreCase property will be preserved.
+ ///
+ /// A set to copy
+ /// a copy of the given set as a . If the given set
+ /// is a the ignoreCase property will be preserved.
+ public static CharArraySet Copy(ISet set)
+ {
+ if (set == null)
+ throw new ArgumentNullException("set", "Given set is null!");
+ if (set == EMPTY_SET)
+ return EMPTY_SET;
+ bool ignoreCase = set is CharArraySet && ((CharArraySet)set)._IgnoreCase;
+ var arrSet = new CharArraySet(set.Count, ignoreCase);
+ arrSet.AddItems(set);
+ return arrSet;
+ }
+
+ public void Clear()
+ {
+ throw new NotSupportedException("Remove not supported!");
+ }
+
+ public bool IsReadOnly
+ {
+ get { return _ReadOnly; }
+ private set { _ReadOnly = value; }
+ }
+
+ /// Adds all of the elements in the specified collection to this collection
+ public void UnionWith(IEnumerable other)
+ {
+ if (_ReadOnly) throw new NotSupportedException();
+
+ foreach (string s in other)
+ {
+ Add(s.ToCharArray());
+ }
+ }
+
+ /// Wrapper that calls UnionWith
+ public void AddAll(IEnumerable coll)
+ {
+ UnionWith(coll);
+ }
+
+ #region Unneeded methods
+ public void RemoveAll(ICollection c)
+ {
+ throw new NotSupportedException();
+ }
+
+ public void RetainAll(ICollection c)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ICollection.CopyTo(string[] array, int arrayIndex)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet.IntersectWith(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet.ExceptWith(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ void ISet.SymmetricExceptWith(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet.IsSubsetOf(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet.IsSupersetOf(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet.IsProperSupersetOf(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet.IsProperSubsetOf(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet.Overlaps(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ISet.SetEquals(IEnumerable other)
+ {
+ throw new NotSupportedException();
+ }
+
+ bool ICollection.Remove(string item)
+ {
+ throw new NotSupportedException();
+ }
+ #endregion
+
+ ///
+ /// The IEnumerator<String> for this set. Strings are constructed on the fly,
+ /// so use nextCharArray for more efficient access
+ ///
+ public class CharArraySetEnumerator : IEnumerator
+ {
+ readonly CharArraySet _Creator;
+ int pos = -1;
+ char[] cur;
+
+ protected internal CharArraySetEnumerator(CharArraySet creator)
+ {
+ _Creator = creator;
+ }
+
+ public bool MoveNext()
+ {
+ cur = null;
+ pos++;
+ while (pos < _Creator._Entries.Length && (cur = _Creator._Entries[pos]) == null)
+ pos++;
+ return cur != null;
+ }
+
+ /// do not modify the returned char[]
+ public char[] NextCharArray()
+ {
+ return cur;
+ }
+
+ public string Current
+ {
+ get { return new string(NextCharArray()); }
+ }
+
+ public void Dispose()
+ {
+ }
+
+ object IEnumerator.Current
+ {
+ get { return new string(NextCharArray()); }
+ }
+
+ public void Reset()
+ {
+ throw new NotImplementedException();
+ }
+ }
+
+ public IEnumerator StringEnumerator()
+ {
+ return new CharArraySetEnumerator(this);
+ }
+
+ public IEnumerator GetEnumerator()
+ {
+ return new CharArraySetEnumerator(this);
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/CharFilter.cs
new file mode 100644
index 0000000000..039f841c3c
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/CharFilter.cs
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Subclasses of CharFilter can be chained to filter CharStream.
+ /// They can be used as with additional offset
+ /// correction. s will automatically use
+ /// if a CharFilter/CharStream subclass is used.
+ ///
+ ///
+ /// $Id$
+ ///
+ ///
+ public abstract class CharFilter : CharStream
+ {
+ private long currentPosition = -1;
+ private bool isDisposed;
+ protected internal CharStream input;
+
+ protected internal CharFilter(CharStream in_Renamed) : base(in_Renamed)
+ {
+ input = in_Renamed;
+ }
+
+ /// Subclass may want to override to correct the current offset.
+ /// current offset
+ /// corrected offset
+ protected internal virtual int Correct(int currentOff)
+ {
+ return currentOff;
+ }
+
+ /// Chains the corrected offset through the input
+ /// CharFilter.
+ ///
+ public override int CorrectOffset(int currentOff)
+ {
+ return input.CorrectOffset(Correct(currentOff));
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (input != null)
+ {
+ input.Close();
+ }
+ }
+
+ input = null;
+ isDisposed = true;
+ base.Dispose(disposing);
+ }
+
+ public override int Read(System.Char[] cbuf, int off, int len)
+ {
+ return input.Read(cbuf, off, len);
+ }
+
+ public bool MarkSupported()
+ {
+ return input.BaseStream.CanSeek;
+ }
+
+ public void Mark(int readAheadLimit)
+ {
+ currentPosition = input.BaseStream.Position;
+ input.BaseStream.Position = readAheadLimit;
+ }
+
+ public void Reset()
+ {
+ input.BaseStream.Position = currentPosition;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharReader.cs b/external/Lucene.Net.Light/src/core/Analysis/CharReader.cs
new file mode 100644
index 0000000000..2120bd4d8d
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/CharReader.cs
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// CharReader is a Reader wrapper. It reads chars from
+ /// Reader and outputs , defining an
+ /// identify function method that
+ /// simply returns the provided offset.
+ ///
+ public sealed class CharReader:CharStream
+ {
+ private long currentPosition = -1;
+
+ private bool isDisposed;
+
+ internal System.IO.StreamReader input;
+
+ public static CharStream Get(System.IO.TextReader input)
+ {
+ var charStream = input as CharStream;
+ if (charStream != null)
+ return charStream;
+
+ // {{Aroush-2.9}} isn't there a better (faster) way to do this?
+ var theString = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(input.ReadToEnd()));
+ return new CharReader(new System.IO.StreamReader(theString));
+ //return input is CharStream?(CharStream) input:new CharReader(input);
+ }
+
+ private CharReader(System.IO.StreamReader in_Renamed) : base(in_Renamed)
+ {
+ input = in_Renamed;
+ }
+
+ public override int CorrectOffset(int currentOff)
+ {
+ return currentOff;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (input != null)
+ {
+ input.Close();
+ }
+ }
+
+ input = null;
+ isDisposed = true;
+ base.Dispose(disposing);
+ }
+
+ public override int Read(System.Char[] cbuf, int off, int len)
+ {
+ return input.Read(cbuf, off, len);
+ }
+
+ public bool MarkSupported()
+ {
+ return input.BaseStream.CanSeek;
+ }
+
+ public void Mark(int readAheadLimit)
+ {
+ currentPosition = input.BaseStream.Position;
+ input.BaseStream.Position = readAheadLimit;
+ }
+
+ public void Reset()
+ {
+ input.BaseStream.Position = currentPosition;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharStream.cs b/external/Lucene.Net.Light/src/core/Analysis/CharStream.cs
new file mode 100644
index 0000000000..0b36fe2d8e
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/CharStream.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// CharStream adds
+ /// functionality over . All Tokenizers accept a
+ /// CharStream instead of as input, which enables
+ /// arbitrary character based filtering before tokenization.
+ /// The method fixed offsets to account for
+ /// removal or insertion of characters, so that the offsets
+ /// reported in the tokens match the character offsets of the
+ /// original Reader.
+ ///
+ public abstract class CharStream : System.IO.StreamReader
+ {
+ protected CharStream(System.IO.StreamReader reader) : base(reader.BaseStream)
+ {
+ }
+
+ /// Called by CharFilter(s) and Tokenizer to correct token offset.
+ ///
+ ///
+ /// offset as seen in the output
+ ///
+ /// corrected offset based on the input
+ ///
+ public abstract int CorrectOffset(int currentOff);
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/CharTokenizer.cs
new file mode 100644
index 0000000000..22423ec345
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/CharTokenizer.cs
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// An abstract base class for simple, character-oriented tokenizers.
+ public abstract class CharTokenizer:Tokenizer
+ {
+ protected CharTokenizer(System.IO.TextReader input):base(input)
+ {
+ offsetAtt = AddAttribute();
+ termAtt = AddAttribute();
+ }
+
+ protected CharTokenizer(AttributeSource source, System.IO.TextReader input):base(source, input)
+ {
+ offsetAtt = AddAttribute();
+ termAtt = AddAttribute();
+ }
+
+ protected CharTokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory, input)
+ {
+ offsetAtt = AddAttribute();
+ termAtt = AddAttribute();
+ }
+
+ private int offset = 0, bufferIndex = 0, dataLen = 0;
+ private const int MAX_WORD_LEN = 255;
+ private const int IO_BUFFER_SIZE = 4096;
+ private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
+
+ private readonly ITermAttribute termAtt;
+ private readonly IOffsetAttribute offsetAtt;
+
+ /// Returns true iff a character should be included in a token. This
+ /// tokenizer generates as tokens adjacent sequences of characters which
+ /// satisfy this predicate. Characters for which this is false are used to
+ /// define token boundaries and are not included in tokens.
+ ///
+ protected internal abstract bool IsTokenChar(char c);
+
+ /// Called on each token character to normalize it before it is added to the
+ /// token. The default implementation does nothing. Subclasses may use this
+ /// to, e.g., lowercase tokens.
+ ///
+ protected internal virtual char Normalize(char c)
+ {
+ return c;
+ }
+
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ int length = 0;
+ int start = bufferIndex;
+ char[] buffer = termAtt.TermBuffer();
+ while (true)
+ {
+
+ if (bufferIndex >= dataLen)
+ {
+ offset += dataLen;
+ dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
+ if (dataLen <= 0)
+ {
+ dataLen = 0; // so next offset += dataLen won't decrement offset
+ if (length > 0)
+ break;
+ return false;
+ }
+ bufferIndex = 0;
+ }
+
+ char c = ioBuffer[bufferIndex++];
+
+ if (IsTokenChar(c))
+ {
+ // if it's a token char
+
+ if (length == 0)
+ // start of token
+ start = offset + bufferIndex - 1;
+ else if (length == buffer.Length)
+ buffer = termAtt.ResizeTermBuffer(1 + length);
+
+ buffer[length++] = Normalize(c); // buffer it, normalized
+
+ if (length == MAX_WORD_LEN)
+ // buffer overflow!
+ break;
+ }
+ else if (length > 0)
+ // at non-Letter w/ chars
+ break; // return 'em
+ }
+
+ termAtt.SetTermLength(length);
+ offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
+ return true;
+ }
+
+ public override void End()
+ {
+ // set final offset
+ int finalOffset = CorrectOffset(offset);
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset(System.IO.TextReader input)
+ {
+ base.Reset(input);
+ bufferIndex = 0;
+ offset = 0;
+ dataLen = 0;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/ISOLatin1AccentFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/ISOLatin1AccentFilter.cs
new file mode 100644
index 0000000000..5fd839e6e4
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/ISOLatin1AccentFilter.cs
@@ -0,0 +1,344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// A filter that replaces accented characters in the ISO Latin 1 character set
+ /// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
+ ///
+ /// For instance, 'À' will be replaced by 'a'.
+ ///
+ ///
+ ///
+ /// If you build a new index, use
+ /// which covers a superset of Latin 1.
+ /// This class is included for use with existing indexes and will be removed
+ /// in a future release (possible Lucene 4.0)
+ ///
+ [Obsolete("If you build a new index, use ASCIIFoldingFilter which covers a superset of Latin 1. This class is included for use with existing indexes and will be removed in a future release (possible Lucene 4.0).")]
+ public class ISOLatin1AccentFilter : TokenFilter
+ {
+ public ISOLatin1AccentFilter(TokenStream input):base(input)
+ {
+ termAtt = AddAttribute();
+ }
+
+ private char[] output = new char[256];
+ private int outputPos;
+ private readonly ITermAttribute termAtt;
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ char[] buffer = termAtt.TermBuffer();
+ int length = termAtt.TermLength();
+ // If no characters actually require rewriting then we
+ // just return token as-is:
+ for (int i = 0; i < length; i++)
+ {
+ char c = buffer[i];
+ if (c >= '\u00c0' && c <= '\uFB06')
+ {
+ RemoveAccents(buffer, length);
+ termAtt.SetTermBuffer(output, 0, outputPos);
+ break;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ /// To replace accented characters in a String by unaccented equivalents.
+ public void RemoveAccents(char[] input, int length)
+ {
+
+ // Worst-case length required:
+ int maxSizeNeeded = 2 * length;
+
+ int size = output.Length;
+ while (size < maxSizeNeeded)
+ size *= 2;
+
+ if (size != output.Length)
+ output = new char[size];
+
+ outputPos = 0;
+
+ int pos = 0;
+
+ for (int i = 0; i < length; i++, pos++)
+ {
+ char c = input[pos];
+
+ // Quick test: if it's not in range then just keep
+ // current character
+ if (c < '\u00c0' || c > '\uFB06')
+ output[outputPos++] = c;
+ else
+ {
+ switch (c)
+ {
+
+ case '\u00C0':
+ // À
+ case '\u00C1':
+ // �?
+ case '\u00C2':
+ // Â
+ case '\u00C3':
+ // Ã
+ case '\u00C4':
+ // Ä
+ case '\u00C5': // Ã…
+ output[outputPos++] = 'A';
+ break;
+
+ case '\u00C6': // Æ
+ output[outputPos++] = 'A';
+ output[outputPos++] = 'E';
+ break;
+
+ case '\u00C7': // Ç
+ output[outputPos++] = 'C';
+ break;
+
+ case '\u00C8':
+ // È
+ case '\u00C9':
+ // É
+ case '\u00CA':
+ // Ê
+ case '\u00CB': // Ë
+ output[outputPos++] = 'E';
+ break;
+
+ case '\u00CC':
+ // Ì
+ case '\u00CD':
+ // �?
+ case '\u00CE':
+ // ÃŽ
+ case '\u00CF': // �?
+ output[outputPos++] = 'I';
+ break;
+
+ case '\u0132': // IJ
+ output[outputPos++] = 'I';
+ output[outputPos++] = 'J';
+ break;
+
+ case '\u00D0': // �?
+ output[outputPos++] = 'D';
+ break;
+
+ case '\u00D1': // Ñ
+ output[outputPos++] = 'N';
+ break;
+
+ case '\u00D2':
+ // Ã’
+ case '\u00D3':
+ // Ó
+ case '\u00D4':
+ // Ô
+ case '\u00D5':
+ // Õ
+ case '\u00D6':
+ // Ö
+ case '\u00D8': // Ø
+ output[outputPos++] = 'O';
+ break;
+
+ case '\u0152': // Å’
+ output[outputPos++] = 'O';
+ output[outputPos++] = 'E';
+ break;
+
+ case '\u00DE': // Þ
+ output[outputPos++] = 'T';
+ output[outputPos++] = 'H';
+ break;
+
+ case '\u00D9':
+ // Ù
+ case '\u00DA':
+ // Ú
+ case '\u00DB':
+ // Û
+ case '\u00DC': // Ü
+ output[outputPos++] = 'U';
+ break;
+
+ case '\u00DD':
+ // �?
+ case '\u0178': // Ÿ
+ output[outputPos++] = 'Y';
+ break;
+
+ case '\u00E0':
+ // Ã
+ case '\u00E1':
+ // á
+ case '\u00E2':
+ // â
+ case '\u00E3':
+ // ã
+ case '\u00E4':
+ // ä
+ case '\u00E5': // å
+ output[outputPos++] = 'a';
+ break;
+
+ case '\u00E6': // æ
+ output[outputPos++] = 'a';
+ output[outputPos++] = 'e';
+ break;
+
+ case '\u00E7': // ç
+ output[outputPos++] = 'c';
+ break;
+
+ case '\u00E8':
+ // è
+ case '\u00E9':
+ // é
+ case '\u00EA':
+ // ê
+ case '\u00EB': // ë
+ output[outputPos++] = 'e';
+ break;
+
+ case '\u00EC':
+ // ì
+ case '\u00ED':
+ // Ã
+ case '\u00EE':
+ // î
+ case '\u00EF': // ï
+ output[outputPos++] = 'i';
+ break;
+
+ case '\u0133': // ij
+ output[outputPos++] = 'i';
+ output[outputPos++] = 'j';
+ break;
+
+ case '\u00F0': // ð
+ output[outputPos++] = 'd';
+ break;
+
+ case '\u00F1': // ñ
+ output[outputPos++] = 'n';
+ break;
+
+ case '\u00F2':
+ // ò
+ case '\u00F3':
+ // ó
+ case '\u00F4':
+ // ô
+ case '\u00F5':
+ // õ
+ case '\u00F6':
+ // ö
+ case '\u00F8': // ø
+ output[outputPos++] = 'o';
+ break;
+
+ case '\u0153': // Å“
+ output[outputPos++] = 'o';
+ output[outputPos++] = 'e';
+ break;
+
+ case '\u00DF': // ß
+ output[outputPos++] = 's';
+ output[outputPos++] = 's';
+ break;
+
+ case '\u00FE': // þ
+ output[outputPos++] = 't';
+ output[outputPos++] = 'h';
+ break;
+
+ case '\u00F9':
+ // ù
+ case '\u00FA':
+ // ú
+ case '\u00FB':
+ // û
+ case '\u00FC': // ü
+ output[outputPos++] = 'u';
+ break;
+
+ case '\u00FD':
+ // ý
+ case '\u00FF': // ÿ
+ output[outputPos++] = 'y';
+ break;
+
+ case '\uFB00': // ff
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'f';
+ break;
+
+ case '\uFB01': // �?
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'i';
+ break;
+
+ case '\uFB02': // fl
+ output[outputPos++] = 'f';
+ output[outputPos++] = 'l';
+ break;
+ // following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive)
+ // case '\uFB03': // ffi
+ // output[outputPos++] = 'f';
+ // output[outputPos++] = 'f';
+ // output[outputPos++] = 'i';
+ // break;
+ // case '\uFB04': // ffl
+ // output[outputPos++] = 'f';
+ // output[outputPos++] = 'f';
+ // output[outputPos++] = 'l';
+ // break;
+
+ case '\uFB05': // ſt
+ output[outputPos++] = 'f';
+ output[outputPos++] = 't';
+ break;
+
+ case '\uFB06': // st
+ output[outputPos++] = 's';
+ output[outputPos++] = 't';
+ break;
+
+ default:
+ output[outputPos++] = c;
+ break;
+
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/KeywordAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/KeywordAnalyzer.cs
new file mode 100644
index 0000000000..116babb97e
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/KeywordAnalyzer.cs
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// "Tokenizes" the entire stream as a single token. This is useful
+ /// for data like zip codes, ids, and some product names.
+ ///
+ public class KeywordAnalyzer:Analyzer
+ {
+ public KeywordAnalyzer()
+ {
+ SetOverridesTokenStreamMethod();
+ }
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new KeywordTokenizer(reader);
+ }
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+ var tokenizer = (Tokenizer) PreviousTokenStream;
+ if (tokenizer == null)
+ {
+ tokenizer = new KeywordTokenizer(reader);
+ PreviousTokenStream = tokenizer;
+ }
+ else
+ tokenizer.Reset(reader);
+ return tokenizer;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/KeywordTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/KeywordTokenizer.cs
new file mode 100644
index 0000000000..f97ff9538c
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/KeywordTokenizer.cs
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Emits the entire input as a single token.
+ public sealed class KeywordTokenizer:Tokenizer
+ {
+
+ private const int DEFAULT_BUFFER_SIZE = 256;
+
+ private bool done;
+ private int finalOffset;
+ private ITermAttribute termAtt;
+ private IOffsetAttribute offsetAtt;
+
+ public KeywordTokenizer(System.IO.TextReader input):this(input, DEFAULT_BUFFER_SIZE)
+ {
+ }
+
+ public KeywordTokenizer(System.IO.TextReader input, int bufferSize):base(input)
+ {
+ Init(bufferSize);
+ }
+
+ public KeywordTokenizer(AttributeSource source, System.IO.TextReader input, int bufferSize):base(source, input)
+ {
+ Init(bufferSize);
+ }
+
+ public KeywordTokenizer(AttributeFactory factory, System.IO.TextReader input, int bufferSize):base(factory, input)
+ {
+ Init(bufferSize);
+ }
+
+ private void Init(int bufferSize)
+ {
+ this.done = false;
+ termAtt = AddAttribute();
+ offsetAtt = AddAttribute();
+ termAtt.ResizeTermBuffer(bufferSize);
+ }
+
+ public override bool IncrementToken()
+ {
+ if (!done)
+ {
+ ClearAttributes();
+ done = true;
+ int upto = 0;
+ char[] buffer = termAtt.TermBuffer();
+ while (true)
+ {
+ int length = input.Read(buffer, upto, buffer.Length - upto);
+ if (length == 0)
+ break;
+ upto += length;
+ if (upto == buffer.Length)
+ buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
+ }
+ termAtt.SetTermLength(upto);
+ finalOffset = CorrectOffset(upto);
+ offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
+ return true;
+ }
+ return false;
+ }
+
+ public override void End()
+ {
+ // set final offset
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset(System.IO.TextReader input)
+ {
+ base.Reset(input);
+ this.done = false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/LengthFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/LengthFilter.cs
new file mode 100644
index 0000000000..c4f60ad40e
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/LengthFilter.cs
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Removes words that are too long or too short from the stream.
+ public sealed class LengthFilter:TokenFilter
+ {
+
+ internal int min;
+ internal int max;
+
+ private readonly ITermAttribute termAtt;
+
+ /// Build a filter that removes words that are too long or too
+ /// short from the text.
+ ///
+ public LengthFilter(TokenStream in_Renamed, int min, int max)
+ : base(in_Renamed)
+ {
+ this.min = min;
+ this.max = max;
+ termAtt = AddAttribute();
+ }
+
+ /// Returns the next input Token whose term() is the right len
+ public override bool IncrementToken()
+ {
+ // return the first non-stop word found
+ while (input.IncrementToken())
+ {
+ var len = termAtt.TermLength();
+ if (len >= min && len <= max)
+ {
+ return true;
+ }
+ // note: else we ignore it but should we index each part of it?
+ }
+ // reached EOS -- return false
+ return false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/LetterTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/LetterTokenizer.cs
new file mode 100644
index 0000000000..77629a85c4
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/LetterTokenizer.cs
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// A LetterTokenizer is a tokenizer that divides text at non-letters. That's
+ /// to say, it defines tokens as maximal strings of adjacent letters, as defined
+ /// by java.lang.Character.isLetter() predicate.
+ /// Note: this does a decent job for most European languages, but does a terrible
+ /// job for some Asian languages, where words are not separated by spaces.
+ ///
+
+ public class LetterTokenizer:CharTokenizer
+ {
+ /// Construct a new LetterTokenizer.
+ public LetterTokenizer(System.IO.TextReader @in):base(@in)
+ {
+ }
+
+ /// Construct a new LetterTokenizer using a given .
+ public LetterTokenizer(AttributeSource source, System.IO.TextReader @in)
+ : base(source, @in)
+ {
+ }
+
+ /// Construct a new LetterTokenizer using a given .
+ public LetterTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+ : base(factory, @in)
+ {
+ }
+
+ /// Collects only characters which satisfy
+ /// .
+ ///
+ protected internal override bool IsTokenChar(char c)
+ {
+ return System.Char.IsLetter(c);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/LowerCaseFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/LowerCaseFilter.cs
new file mode 100644
index 0000000000..cad01976f4
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/LowerCaseFilter.cs
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Normalizes token text to lower case.
+ public sealed class LowerCaseFilter:TokenFilter
+ {
+ public LowerCaseFilter(TokenStream @in)
+ : base(@in)
+ {
+ termAtt = AddAttribute();
+ }
+
+ private readonly ITermAttribute termAtt;
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+
+ char[] buffer = termAtt.TermBuffer();
+ int length = termAtt.TermLength();
+ for (int i = 0; i < length; i++)
+ buffer[i] = System.Char.ToLower(buffer[i]);
+
+ return true;
+ }
+ return false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/LowerCaseTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/LowerCaseTokenizer.cs
new file mode 100644
index 0000000000..4cea2179dc
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/LowerCaseTokenizer.cs
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// LowerCaseTokenizer performs the function of LetterTokenizer
+ /// and LowerCaseFilter together. It divides text at non-letters and converts
+ /// them to lower case. While it is functionally equivalent to the combination
+ /// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+ /// to doing the two tasks at once, hence this (redundant) implementation.
+ ///
+ /// Note: this does a decent job for most European languages, but does a terrible
+ /// job for some Asian languages, where words are not separated by spaces.
+ ///
+ public sealed class LowerCaseTokenizer:LetterTokenizer
+ {
+ /// Construct a new LowerCaseTokenizer.
+ public LowerCaseTokenizer(System.IO.TextReader @in)
+ : base(@in)
+ {
+ }
+
+ /// Construct a new LowerCaseTokenizer using a given .
+ public LowerCaseTokenizer(AttributeSource source, System.IO.TextReader @in)
+ : base(source, @in)
+ {
+ }
+
+ /// Construct a new LowerCaseTokenizer using a given .
+ public LowerCaseTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+ : base(factory, @in)
+ {
+ }
+
+ /// Converts char to lower case
+ /// .
+ ///
+ protected internal override char Normalize(char c)
+ {
+ return System.Char.ToLower(c);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/MappingCharFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/MappingCharFilter.cs
new file mode 100644
index 0000000000..9705719f8a
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/MappingCharFilter.cs
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Simplistic that applies the mappings
+ /// contained in a to the character
+ /// stream, and correcting the resulting changes to the
+ /// offsets.
+ ///
+ public class MappingCharFilter : BaseCharFilter
+ {
+ private readonly NormalizeCharMap normMap;
+ private LinkedList buffer;
+ private System.String replacement;
+ private int charPointer;
+ private int nextCharCounter;
+
+ /// Default constructor that takes a .
+ public MappingCharFilter(NormalizeCharMap normMap, CharStream @in)
+ : base(@in)
+ {
+ this.normMap = normMap;
+ }
+
+ /// Easy-use constructor that takes a .
+ public MappingCharFilter(NormalizeCharMap normMap, System.IO.TextReader @in)
+ : base(CharReader.Get(@in))
+ {
+ this.normMap = normMap;
+ }
+
+ public override int Read()
+ {
+ while (true)
+ {
+ if (replacement != null && charPointer < replacement.Length)
+ {
+ return replacement[charPointer++];
+ }
+
+ int firstChar = NextChar();
+ if (firstChar == - 1)
+ return - 1;
+ NormalizeCharMap nm = normMap.submap != null
+ ? normMap.submap[(char) firstChar]
+ : null;
+ if (nm == null)
+ return firstChar;
+ NormalizeCharMap result = Match(nm);
+ if (result == null)
+ return firstChar;
+ replacement = result.normStr;
+ charPointer = 0;
+ if (result.diff != 0)
+ {
+ int prevCumulativeDiff = LastCumulativeDiff;
+ if (result.diff < 0)
+ {
+ for (int i = 0; i < - result.diff; i++)
+ AddOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
+ }
+ else
+ {
+ AddOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
+ }
+ }
+ }
+ }
+
+ private int NextChar()
+ {
+ nextCharCounter++;
+ if (buffer != null && buffer.Count != 0)
+ {
+ char tempObject = buffer.First.Value;
+ buffer.RemoveFirst();
+ return (tempObject);
+ }
+ return input.Read();
+ }
+
+ private void PushChar(int c)
+ {
+ nextCharCounter--;
+ if (buffer == null)
+ {
+ buffer = new LinkedList();
+ }
+ buffer.AddFirst((char)c);
+ }
+
+ private void PushLastChar(int c)
+ {
+ if (buffer == null)
+ {
+ buffer = new LinkedList();
+ }
+ buffer.AddLast((char)c);
+ }
+
+ private NormalizeCharMap Match(NormalizeCharMap map)
+ {
+ NormalizeCharMap result = null;
+ if (map.submap != null)
+ {
+ int chr = NextChar();
+ if (chr != - 1)
+ {
+ NormalizeCharMap subMap = map.submap[(char)chr];
+ if (subMap != null)
+ {
+ result = Match(subMap);
+ }
+ if (result == null)
+ {
+ PushChar(chr);
+ }
+ }
+ }
+ if (result == null && map.normStr != null)
+ {
+ result = map;
+ }
+ return result;
+ }
+
+ public override int Read(System.Char[] cbuf, int off, int len)
+ {
+ var tmp = new char[len];
+ int l = input.Read(tmp, 0, len);
+ if (l != 0)
+ {
+ for (int i = 0; i < l; i++)
+ PushLastChar(tmp[i]);
+ }
+ l = 0;
+ for (int i = off; i < off + len; i++)
+ {
+ int c = Read();
+ if (c == - 1)
+ break;
+ cbuf[i] = (char) c;
+ l++;
+ }
+ return l == 0?- 1:l;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/NormalizeCharMap.cs b/external/Lucene.Net.Light/src/core/Analysis/NormalizeCharMap.cs
new file mode 100644
index 0000000000..7fd520c876
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/NormalizeCharMap.cs
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Holds a map of String input to String output, to be used
+ /// with .
+ ///
+ public class NormalizeCharMap
+ {
+ internal System.Collections.Generic.IDictionary submap;
+ internal System.String normStr;
+ internal int diff;
+
+ /// Records a replacement to be applied to the inputs
+ /// stream. Whenever singleMatch occurs in
+ /// the input, it will be replaced with
+ /// replacement .
+ ///
+ ///
+ /// input String to be replaced
+ ///
+ /// output String
+ ///
+ public virtual void Add(System.String singleMatch, System.String replacement)
+ {
+ NormalizeCharMap currMap = this;
+ for (var i = 0; i < singleMatch.Length; i++)
+ {
+ char c = singleMatch[i];
+ if (currMap.submap == null)
+ {
+ currMap.submap = new HashMap(1);
+ }
+ var map = currMap.submap[c];
+ if (map == null)
+ {
+ map = new NormalizeCharMap();
+ currMap.submap[c] = map;
+ }
+ currMap = map;
+ }
+ if (currMap.normStr != null)
+ {
+ throw new System.SystemException("MappingCharFilter: there is already a mapping for " + singleMatch);
+ }
+ currMap.normStr = replacement;
+ currMap.diff = singleMatch.Length - replacement.Length;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/NumericTokenStream.cs b/external/Lucene.Net.Light/src/core/Analysis/NumericTokenStream.cs
new file mode 100644
index 0000000000..90b6e72882
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/NumericTokenStream.cs
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Search;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using NumericField = Lucene.Net.Documents.NumericField;
+// javadocs
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Expert: This class provides a
+ /// for indexing numeric values that can be used by
+ /// or .
+ ///
+ ///
Note that for simple usage, is
+ /// recommended. disables norms and
+ /// term freqs, as they are not usually needed during
+ /// searching. If you need to change these settings, you
+ /// should use this class.
+ ///
+ ///
See for capabilities of fields
+ /// indexed numerically.
+ ///
+ ///
Here's an example usage, for an int field:
+ ///
+ ///
+ /// Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ /// field.setOmitNorms(true);
+ /// field.setOmitTermFreqAndPositions(true);
+ /// document.add(field);
+ ///
+ ///
+ ///
For optimal performance, re-use the TokenStream and Field instance
+ /// for more than one document:
+ ///
+ ///
+ /// NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ /// Field field = new Field(name, stream);
+ /// field.setOmitNorms(true);
+ /// field.setOmitTermFreqAndPositions(true);
+ /// Document document = new Document();
+ /// document.add(field);
+ ///
+ /// for(all documents) {
+ /// stream.setIntValue(value)
+ /// writer.addDocument(document);
+ /// }
+ ///
+ ///
+ ///
This stream is not intended to be used in analyzers;
+ /// it's more for iterating the different precisions during
+ /// indexing a specific numeric value.
+ ///
+ ///
NOTE : as token streams are only consumed once
+ /// the document is added to the index, if you index more
+ /// than one numeric field, use a separate NumericTokenStream
+ /// instance for each.
+ ///
+ ///
See for more details on the
+ /// precisionStep
+ /// parameter as well as how numeric fields work under the hood.
+ ///
+ ///
NOTE: This API is experimental and
+ /// might change in incompatible ways in the next release.
+ /// Since 2.9
+ ///
+ public sealed class NumericTokenStream : TokenStream
+ {
+ private void InitBlock()
+ {
+ termAtt = AddAttribute();
+ typeAtt = AddAttribute();
+ posIncrAtt = AddAttribute();
+ }
+
+ /// The full precision token gets this token type assigned.
+ public const System.String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
+
+ /// The lower precision tokens gets this token type assigned.
+ public const System.String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
+
+ /// Creates a token stream for numeric values using the default precisionStep
+ /// (4). The stream is not yet initialized,
+ /// before using set a value using the various set??? Value() methods.
+ ///
+ public NumericTokenStream():this(NumericUtils.PRECISION_STEP_DEFAULT)
+ {
+ }
+
+ /// Creates a token stream for numeric values with the specified
+ /// precisionStep . The stream is not yet initialized,
+ /// before using set a value using the various set??? Value() methods.
+ ///
+ public NumericTokenStream(int precisionStep):base()
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// Expert: Creates a token stream for numeric values with the specified
+ /// precisionStep using the given .
+ /// The stream is not yet initialized,
+ /// before using set a value using the various set??? Value() methods.
+ ///
+ public NumericTokenStream(AttributeSource source, int precisionStep):base(source)
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// Expert: Creates a token stream for numeric values with the specified
+ /// precisionStep using the given
+ /// .
+ /// The stream is not yet initialized,
+ /// before using set a value using the various set??? Value() methods.
+ ///
+ public NumericTokenStream(AttributeFactory factory, int precisionStep):base(factory)
+ {
+ InitBlock();
+ this.precisionStep = precisionStep;
+ if (precisionStep < 1)
+ throw new System.ArgumentException("precisionStep must be >=1");
+ }
+
+ /// Initializes the token stream with the supplied long value.
+ /// the value, for which this TokenStream should enumerate tokens.
+ ///
+ /// this instance, because of this you can use it the following way:
+ /// new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value))
+ ///
+ public NumericTokenStream SetLongValue(long value_Renamed)
+ {
+ this.value_Renamed = value_Renamed;
+ valSize = 64;
+ shift = 0;
+ return this;
+ }
+
+ /// Initializes the token stream with the supplied int value.
+ /// the value, for which this TokenStream should enumerate tokens.
+ ///
+ /// this instance, because of this you can use it the following way:
+ /// new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value))
+ ///
+ public NumericTokenStream SetIntValue(int value_Renamed)
+ {
+ this.value_Renamed = (long) value_Renamed;
+ valSize = 32;
+ shift = 0;
+ return this;
+ }
+
+ /// Initializes the token stream with the supplied double value.
+ /// the value, for which this TokenStream should enumerate tokens.
+ ///
+ /// this instance, because of this you can use it the following way:
+ /// new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value))
+ ///
+ public NumericTokenStream SetDoubleValue(double value_Renamed)
+ {
+ this.value_Renamed = NumericUtils.DoubleToSortableLong(value_Renamed);
+ valSize = 64;
+ shift = 0;
+ return this;
+ }
+
+ /// Initializes the token stream with the supplied float value.
+ /// the value, for which this TokenStream should enumerate tokens.
+ ///
+ /// this instance, because of this you can use it the following way:
+ /// new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value))
+ ///
+ public NumericTokenStream SetFloatValue(float value_Renamed)
+ {
+ this.value_Renamed = (long) NumericUtils.FloatToSortableInt(value_Renamed);
+ valSize = 32;
+ shift = 0;
+ return this;
+ }
+
+ // @Override
+ public override void Reset()
+ {
+ if (valSize == 0)
+ throw new System.SystemException("call set???Value() before usage");
+ shift = 0;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing.
+ }
+
+ // @Override
+ public override bool IncrementToken()
+ {
+ if (valSize == 0)
+ throw new System.SystemException("call set???Value() before usage");
+ if (shift >= valSize)
+ return false;
+
+ ClearAttributes();
+ char[] buffer;
+ switch (valSize)
+ {
+
+ case 64:
+ buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
+ termAtt.SetTermLength(NumericUtils.LongToPrefixCoded(value_Renamed, shift, buffer));
+ break;
+
+
+ case 32:
+ buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_INT);
+ termAtt.SetTermLength(NumericUtils.IntToPrefixCoded((int) value_Renamed, shift, buffer));
+ break;
+
+
+ default:
+ // should not happen
+ throw new System.ArgumentException("valSize must be 32 or 64");
+
+ }
+
+ typeAtt.Type = (shift == 0)?TOKEN_TYPE_FULL_PREC:TOKEN_TYPE_LOWER_PREC;
+ posIncrAtt.PositionIncrement = (shift == 0)?1:0;
+ shift += precisionStep;
+ return true;
+ }
+
+ // @Override
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder("(numeric,valSize=").Append(valSize);
+ sb.Append(",precisionStep=").Append(precisionStep).Append(')');
+ return sb.ToString();
+ }
+
+ // members
+ private ITermAttribute termAtt;
+ private ITypeAttribute typeAtt;
+ private IPositionIncrementAttribute posIncrAtt;
+
+ private int shift = 0, valSize = 0; // valSize==0 means not initialized
+ private readonly int precisionStep;
+
+ private long value_Renamed = 0L;
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/PerFieldAnalyzerWrapper.cs b/external/Lucene.Net.Light/src/core/Analysis/PerFieldAnalyzerWrapper.cs
new file mode 100644
index 0000000000..b1c43aa886
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/PerFieldAnalyzerWrapper.cs
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// This analyzer is used to facilitate scenarios where different
+ /// fields require different analysis techniques. Use
+ /// to add a non-default analyzer on a field name basis.
+ ///
+ ///
Example usage:
+ ///
+ ///
+ /// PerFieldAnalyzerWrapper aWrapper =
+ /// new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+ /// aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+ /// aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+ ///
+ ///
+ ///
In this example, StandardAnalyzer will be used for all fields except "firstname"
+ /// and "lastname", for which KeywordAnalyzer will be used.
+ ///
+ ///
A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+ /// and query parsing.
+ ///
+ public class PerFieldAnalyzerWrapper:Analyzer
+ {
+ private readonly Analyzer defaultAnalyzer;
+ private readonly IDictionary analyzerMap = new HashMap();
+
+
+ /// Constructs with default analyzer.
+ ///
+ ///
+ /// Any fields not specifically
+ /// defined to use a different analyzer will use the one provided here.
+ ///
+ public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer)
+ : this(defaultAnalyzer, null)
+ {
+ }
+
+ /// Constructs with default analyzer and a map of analyzers to use for
+ /// specific fields.
+ ///
+ ///
+ /// Any fields not specifically
+ /// defined to use a different analyzer will use the one provided here.
+ ///
+ /// a Map (String field name to the Analyzer) to be
+ /// used for those fields
+ ///
+ public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IEnumerable> fieldAnalyzers)
+ {
+ this.defaultAnalyzer = defaultAnalyzer;
+ if (fieldAnalyzers != null)
+ {
+ foreach(var entry in fieldAnalyzers)
+ analyzerMap[entry.Key] = entry.Value;
+ }
+ SetOverridesTokenStreamMethod();
+ }
+
+
+ /// Defines an analyzer to use for the specified field.
+ ///
+ ///
+ /// field name requiring a non-default analyzer
+ ///
+ /// non-default analyzer to use for field
+ ///
+ public virtual void AddAnalyzer(System.String fieldName, Analyzer analyzer)
+ {
+ analyzerMap[fieldName] = analyzer;
+ }
+
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+
+ return analyzer.TokenStream(fieldName, reader);
+ }
+
+ public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+
+ return analyzer.ReusableTokenStream(fieldName, reader);
+ }
+
+ /// Return the positionIncrementGap from the analyzer assigned to fieldName
+ public override int GetPositionIncrementGap(string fieldName)
+ {
+ var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer;
+ return analyzer.GetPositionIncrementGap(fieldName);
+ }
+
+ /// Return the offsetGap from the analyzer assigned to field
+ public override int GetOffsetGap(Documents.IFieldable field)
+ {
+ Analyzer analyzer = analyzerMap[field.Name] ?? defaultAnalyzer;
+ return analyzer.GetOffsetGap(field);
+ }
+
+ public override System.String ToString()
+ {
+ // {{Aroush-2.9}} will 'analyzerMap.ToString()' work in the same way as Java's java.util.HashMap.toString()?
+ return "PerFieldAnalyzerWrapper(" + analyzerMap + ", default=" + defaultAnalyzer + ")";
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/PorterStemFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/PorterStemFilter.cs
new file mode 100644
index 0000000000..b7f1dbf560
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/PorterStemFilter.cs
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Analysis.Tokenattributes;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Transforms the token stream as per the Porter stemming algorithm.
+ /// Note: the input to the stemming filter must already be in lower case,
+ /// so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+ /// down the Tokenizer chain in order for this to work properly!
+ ///
+ /// To use this filter with other analyzers, you'll want to write an
+ /// Analyzer class that sets up the TokenStream chain as you want it.
+ /// To use this with LowerCaseTokenizer, for example, you'd write an
+ /// analyzer like this:
+ ///
+ ///
+ /// class MyAnalyzer extends Analyzer {
+ /// public final TokenStream tokenStream(String fieldName, Reader reader) {
+ /// return new PorterStemFilter(new LowerCaseTokenizer(reader));
+ /// }
+ /// }
+ ///
+ ///
+ public sealed class PorterStemFilter:TokenFilter
+ {
+ private readonly PorterStemmer stemmer;
+ private readonly ITermAttribute termAtt;
+
+ public PorterStemFilter(TokenStream in_Renamed):base(in_Renamed)
+ {
+ stemmer = new PorterStemmer();
+ termAtt = AddAttribute();
+ }
+
+ public override bool IncrementToken()
+ {
+ if (!input.IncrementToken())
+ return false;
+
+ if (stemmer.Stem(termAtt.TermBuffer(), 0, termAtt.TermLength()))
+ termAtt.SetTermBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength);
+ return true;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/PorterStemmer.cs b/external/Lucene.Net.Light/src/core/Analysis/PorterStemmer.cs
new file mode 100644
index 0000000000..f47c5a76fd
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/PorterStemmer.cs
@@ -0,0 +1,746 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+Porter stemmer in Java. The original paper is in
+
+Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+no. 3, pp 130-137,
+
+See also http://www.tartarus.org/~martin/PorterStemmer/index.html
+
+Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+is then out outside the bounds of b.
+
+Similarly,
+
+Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+b[j] is then outside the bounds of b.
+
+Release 3.
+
+[ This version is derived from Release 3, modified by Brian Goetz to
+optimize for fewer object creations. ]
+*/
+using System;
+namespace Lucene.Net.Analysis
+{
+
+ ///
+ /// Stemmer, implementing the Porter Stemming Algorithm
+ ///
+ /// The Stemmer class transforms a word into its root form. The input
+ /// word can be provided a character at time (by calling add()), or at once
+ /// by calling one of the various stem(something) methods.
+ ///
+
+ class PorterStemmer
+ {
+ private char[] b;
+ private int i, j, k, k0;
+ private bool dirty = false;
+ private const int INC = 50; /* unit of size whereby b is increased */
+ private const int EXTRA = 1;
+
+ public PorterStemmer()
+ {
+ b = new char[INC];
+ i = 0;
+ }
+
+ /// reset() resets the stemmer so it can stem another word. If you invoke
+ /// the stemmer by calling add(char) and then stem(), you must call reset()
+ /// before starting another word.
+ ///
+ public virtual void Reset()
+ {
+ i = 0; dirty = false;
+ }
+
+ /// Add a character to the word being stemmed. When you are finished
+ /// adding characters, you can call stem(void) to process the word.
+ ///
+ public virtual void Add(char ch)
+ {
+ if (b.Length <= i + EXTRA)
+ {
+ var new_b = new char[b.Length + INC];
+ Array.Copy(b, 0, new_b, 0, b.Length);
+ b = new_b;
+ }
+ b[i++] = ch;
+ }
+
+ /// After a word has been stemmed, it can be retrieved by toString(),
+ /// or a reference to the internal buffer can be retrieved by getResultBuffer
+ /// and getResultLength (which is generally more efficient.)
+ ///
+ public override System.String ToString()
+ {
+ return new System.String(b, 0, i);
+ }
+
+ /// Returns the length of the word resulting from the stemming process.
+ public virtual int ResultLength
+ {
+ get { return i; }
+ }
+
+ /// Returns a reference to a character buffer containing the results of
+ /// the stemming process. You also need to consult getResultLength()
+ /// to determine the length of the result.
+ ///
+ public virtual char[] ResultBuffer
+ {
+ get { return b; }
+ }
+
+ /* cons(i) is true <=> b[i] is a consonant. */
+
+ private bool Cons(int i)
+ {
+ switch (b[i])
+ {
+
+ case 'a':
+ case 'e':
+ case 'i':
+ case 'o':
+ case 'u':
+ return false;
+
+ case 'y':
+ return (i == k0)?true:!Cons(i - 1);
+
+ default:
+ return true;
+
+ }
+ }
+
+ /* m() measures the number of consonant sequences between k0 and j. if c is
+ a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+ presence,
+
+ gives 0
+ vc gives 1
+ vcvc gives 2
+ vcvcvc gives 3
+ ....
+ */
+
+ private int M()
+ {
+ int n = 0;
+ int i = k0;
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (!Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ while (true)
+ {
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ n++;
+ while (true)
+ {
+ if (i > j)
+ return n;
+ if (!Cons(i))
+ break;
+ i++;
+ }
+ i++;
+ }
+ }
+
+ /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+ private bool Vowelinstem()
+ {
+ int i;
+ for (i = k0; i <= j; i++)
+ if (!Cons(i))
+ return true;
+ return false;
+ }
+
+ /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+ private bool Doublec(int j)
+ {
+ if (j < k0 + 1)
+ return false;
+ if (b[j] != b[j - 1])
+ return false;
+ return Cons(j);
+ }
+
+ /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+ and also if the second c is not w,x or y. this is used when trying to
+ restore an e at the end of a short word. e.g.
+
+ cav(e), lov(e), hop(e), crim(e), but
+ snow, box, tray.
+
+ */
+
+ private bool Cvc(int i)
+ {
+ if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2))
+ return false;
+ else
+ {
+ int ch = b[i];
+ if (ch == 'w' || ch == 'x' || ch == 'y')
+ return false;
+ }
+ return true;
+ }
+
+ private bool Ends(System.String s)
+ {
+ int l = s.Length;
+ int o = k - l + 1;
+ if (o < k0)
+ return false;
+ for (int i = 0; i < l; i++)
+ if (b[o + i] != s[i])
+ return false;
+ j = k - l;
+ return true;
+ }
+
+ /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+ k. */
+
+ internal virtual void Setto(System.String s)
+ {
+ int l = s.Length;
+ int o = j + 1;
+ for (int i = 0; i < l; i++)
+ b[o + i] = s[i];
+ k = j + l;
+ dirty = true;
+ }
+
+ /* r(s) is used further down. */
+
+ internal virtual void R(System.String s)
+ {
+ if (M() > 0)
+ Setto(s);
+ }
+
+ /* step1() gets rid of plurals and -ed or -ing. e.g.
+
+ caresses -> caress
+ ponies -> poni
+ ties -> ti
+ caress -> caress
+ cats -> cat
+
+ feed -> feed
+ agreed -> agree
+ disabled -> disable
+
+ matting -> mat
+ mating -> mate
+ meeting -> meet
+ milling -> mill
+ messing -> mess
+
+ meetings -> meet
+
+ */
+
+ private void Step1()
+ {
+ if (b[k] == 's')
+ {
+ if (Ends("sses"))
+ k -= 2;
+ else if (Ends("ies"))
+ Setto("i");
+ else if (b[k - 1] != 's')
+ k--;
+ }
+ if (Ends("eed"))
+ {
+ if (M() > 0)
+ k--;
+ }
+ else if ((Ends("ed") || Ends("ing")) && Vowelinstem())
+ {
+ k = j;
+ if (Ends("at"))
+ Setto("ate");
+ else if (Ends("bl"))
+ Setto("ble");
+ else if (Ends("iz"))
+ Setto("ize");
+ else if (Doublec(k))
+ {
+ int ch = b[k--];
+ if (ch == 'l' || ch == 's' || ch == 'z')
+ k++;
+ }
+ else if (M() == 1 && Cvc(k))
+ Setto("e");
+ }
+ }
+
+ /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+ private void Step2()
+ {
+ if (Ends("y") && Vowelinstem())
+ {
+ b[k] = 'i';
+ dirty = true;
+ }
+ }
+
+ /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+ -ation) maps to -ize etc. note that the string before the suffix must give
+ m() > 0. */
+
+ private void Step3()
+ {
+ if (k == k0)
+ return ; /* For Bug 1 */
+ switch (b[k - 1])
+ {
+
+ case 'a':
+ if (Ends("ational"))
+ {
+ R("ate"); break;
+ }
+ if (Ends("tional"))
+ {
+ R("tion"); break;
+ }
+ break;
+
+ case 'c':
+ if (Ends("enci"))
+ {
+ R("ence"); break;
+ }
+ if (Ends("anci"))
+ {
+ R("ance"); break;
+ }
+ break;
+
+ case 'e':
+ if (Ends("izer"))
+ {
+ R("ize"); break;
+ }
+ break;
+
+ case 'l':
+ if (Ends("bli"))
+ {
+ R("ble"); break;
+ }
+ if (Ends("alli"))
+ {
+ R("al"); break;
+ }
+ if (Ends("entli"))
+ {
+ R("ent"); break;
+ }
+ if (Ends("eli"))
+ {
+ R("e"); break;
+ }
+ if (Ends("ousli"))
+ {
+ R("ous"); break;
+ }
+ break;
+
+ case 'o':
+ if (Ends("ization"))
+ {
+ R("ize"); break;
+ }
+ if (Ends("ation"))
+ {
+ R("ate"); break;
+ }
+ if (Ends("ator"))
+ {
+ R("ate"); break;
+ }
+ break;
+
+ case 's':
+ if (Ends("alism"))
+ {
+ R("al"); break;
+ }
+ if (Ends("iveness"))
+ {
+ R("ive"); break;
+ }
+ if (Ends("fulness"))
+ {
+ R("ful"); break;
+ }
+ if (Ends("ousness"))
+ {
+ R("ous"); break;
+ }
+ break;
+
+ case 't':
+ if (Ends("aliti"))
+ {
+ R("al"); break;
+ }
+ if (Ends("iviti"))
+ {
+ R("ive"); break;
+ }
+ if (Ends("biliti"))
+ {
+ R("ble"); break;
+ }
+ break;
+
+ case 'g':
+ if (Ends("logi"))
+ {
+ R("log"); break;
+ }
+ break;
+ }
+ }
+
+ /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+ private void Step4()
+ {
+ switch (b[k])
+ {
+
+ case 'e':
+ if (Ends("icate"))
+ {
+ R("ic"); break;
+ }
+ if (Ends("ative"))
+ {
+ R(""); break;
+ }
+ if (Ends("alize"))
+ {
+ R("al"); break;
+ }
+ break;
+
+ case 'i':
+ if (Ends("iciti"))
+ {
+ R("ic"); break;
+ }
+ break;
+
+ case 'l':
+ if (Ends("ical"))
+ {
+ R("ic"); break;
+ }
+ if (Ends("ful"))
+ {
+ R(""); break;
+ }
+ break;
+
+ case 's':
+ if (Ends("ness"))
+ {
+ R(""); break;
+ }
+ break;
+ }
+ }
+
+ /* step5() takes off -ant, -ence etc., in context vcvc. */
+
+ private void Step5()
+ {
+ if (k == k0)
+ return ; /* for Bug 1 */
+ switch (b[k - 1])
+ {
+
+ case 'a':
+ if (Ends("al"))
+ break;
+ return ;
+
+ case 'c':
+ if (Ends("ance"))
+ break;
+ if (Ends("ence"))
+ break;
+ return ;
+
+ case 'e':
+ if (Ends("er"))
+ break; return ;
+
+ case 'i':
+ if (Ends("ic"))
+ break; return ;
+
+ case 'l':
+ if (Ends("able"))
+ break;
+ if (Ends("ible"))
+ break; return ;
+
+ case 'n':
+ if (Ends("ant"))
+ break;
+ if (Ends("ement"))
+ break;
+ if (Ends("ment"))
+ break;
+ /* element etc. not stripped before the m */
+ if (Ends("ent"))
+ break;
+ return ;
+
+ case 'o':
+ if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't'))
+ break;
+ /* j >= 0 fixes Bug 2 */
+ if (Ends("ou"))
+ break;
+ return ;
+ /* takes care of -ous */
+
+ case 's':
+ if (Ends("ism"))
+ break;
+ return ;
+
+ case 't':
+ if (Ends("ate"))
+ break;
+ if (Ends("iti"))
+ break;
+ return ;
+
+ case 'u':
+ if (Ends("ous"))
+ break;
+ return ;
+
+ case 'v':
+ if (Ends("ive"))
+ break;
+ return ;
+
+ case 'z':
+ if (Ends("ize"))
+ break;
+ return ;
+
+ default:
+ return ;
+
+ }
+ if (M() > 1)
+ k = j;
+ }
+
+ /* step6() removes a final -e if m() > 1. */
+
+ private void Step6()
+ {
+ j = k;
+ if (b[k] == 'e')
+ {
+ int a = M();
+ if (a > 1 || a == 1 && !Cvc(k - 1))
+ k--;
+ }
+ if (b[k] == 'l' && Doublec(k) && M() > 1)
+ k--;
+ }
+
+
+ /// Stem a word provided as a String. Returns the result as a String.
+ public virtual System.String Stem(System.String s)
+ {
+ if (Stem(s.ToCharArray(), s.Length))
+ {
+ return ToString();
+ }
+ else
+ return s;
+ }
+
+ /// Stem a word contained in a char[]. Returns true if the stemming process
+ /// resulted in a word different from the input. You can retrieve the
+ /// result with getResultLength()/getResultBuffer() or toString().
+ ///
+ public virtual bool Stem(char[] word)
+ {
+ return Stem(word, word.Length);
+ }
+
+ /// Stem a word contained in a portion of a char[] array. Returns
+ /// true if the stemming process resulted in a word different from
+ /// the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ ///
+ public virtual bool Stem(char[] wordBuffer, int offset, int wordLen)
+ {
+ Reset();
+ if (b.Length < wordLen)
+ {
+ var new_b = new char[wordLen + EXTRA];
+ b = new_b;
+ }
+ Array.Copy(wordBuffer, offset, b, 0, wordLen);
+ i = wordLen;
+ return Stem(0);
+ }
+
+ /// Stem a word contained in a leading portion of a char[] array.
+ /// Returns true if the stemming process resulted in a word different
+ /// from the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ ///
+ public virtual bool Stem(char[] word, int wordLen)
+ {
+ return Stem(word, 0, wordLen);
+ }
+
+ /// Stem the word placed into the Stemmer buffer through calls to add().
+ /// Returns true if the stemming process resulted in a word different
+ /// from the input. You can retrieve the result with
+ /// getResultLength()/getResultBuffer() or toString().
+ ///
+ public virtual bool Stem()
+ {
+ return Stem(0);
+ }
+
+ public virtual bool Stem(int i0)
+ {
+ k = i - 1;
+ k0 = i0;
+ if (k > k0 + 1)
+ {
+ Step1(); Step2(); Step3(); Step4(); Step5(); Step6();
+ }
+ // Also, a word is considered dirty if we lopped off letters
+ // Thanks to Ifigenia Vairelles for pointing this out.
+ if (i != k + 1)
+ dirty = true;
+ i = k + 1;
+ return dirty;
+ }
+
+ /// Test program for demonstrating the Stemmer. It reads a file and
+ /// stems each word, writing the result to standard out.
+ /// Usage: Stemmer file-name
+ ///
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+ var s = new PorterStemmer();
+
+ for (int i = 0; i < args.Length; i++)
+ {
+ try
+ {
+ System.IO.Stream in_Renamed = new System.IO.FileStream(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read);
+ var buffer = new byte[1024];
+
+ int bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+ int offset = 0;
+ s.Reset();
+
+ while (true)
+ {
+ int ch;
+ if (offset < bufferLen)
+ ch = buffer[offset++];
+ else
+ {
+ bufferLen = in_Renamed.Read(buffer, 0, buffer.Length);
+ offset = 0;
+ if (bufferLen < 0)
+ ch = - 1;
+ else
+ ch = buffer[offset++];
+ }
+
+ if (Char.IsLetter((char) ch))
+ {
+ s.Add(Char.ToLower((char) ch));
+ }
+ else
+ {
+ s.Stem();
+ Console.Out.Write(s.ToString());
+ s.Reset();
+ if (ch < 0)
+ break;
+ else
+ {
+ System.Console.Out.Write((char) ch);
+ }
+ }
+ }
+
+ in_Renamed.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ Console.Out.WriteLine("error reading " + args[i]);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/SimpleAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/SimpleAnalyzer.cs
new file mode 100644
index 0000000000..b84f470e8e
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/SimpleAnalyzer.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// An that filters
+ /// with
+ ///
+
+ public sealed class SimpleAnalyzer : Analyzer
+ {
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new LowerCaseTokenizer(reader);
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var tokenizer = (Tokenizer) PreviousTokenStream;
+ if (tokenizer == null)
+ {
+ tokenizer = new LowerCaseTokenizer(reader);
+ PreviousTokenStream = tokenizer;
+ }
+ else
+ tokenizer.Reset(reader);
+ return tokenizer;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardAnalyzer.cs
new file mode 100644
index 0000000000..347d0262a4
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardAnalyzer.cs
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Analysis;
+using Lucene.Net.Util;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Standard
+{
+
+ /// Filters with ,
+ /// and , using a list of English stop
+ /// words.
+ ///
+ ///
+ ///
+ /// You must specify the required compatibility when creating
+ /// StandardAnalyzer:
+ ///
+ /// - As of 2.9, StopFilter preserves position increments
+ /// - As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+ ///
LUCENE-1608 )
+ ///
+ ///
+ public class StandardAnalyzer : Analyzer
+ {
+ private ISet stopSet;
+
+ /// Specifies whether deprecated acronyms should be replaced with HOST type.
+ /// See https://issues.apache.org/jira/browse/LUCENE-1068
+ ///
+ private bool replaceInvalidAcronym, enableStopPositionIncrements;
+
+ /// An unmodifiable set containing some common English words that are usually not
+ /// useful for searching.
+ ///
+ public static readonly ISet STOP_WORDS_SET;
+ private Version matchVersion;
+
+ /// Builds an analyzer with the default stop words ( ).
+ ///
+ /// Lucene version to match see above
+ public StandardAnalyzer(Version matchVersion)
+ : this(matchVersion, STOP_WORDS_SET)
+ { }
+
+ /// Builds an analyzer with the given stop words.
+ /// Lucene version to match See above />
+ ///
+ ///
+ /// stop words
+ ///
+ public StandardAnalyzer(Version matchVersion, ISet stopWords)
+ {
+ stopSet = stopWords;
+ SetOverridesTokenStreamMethod();
+ enableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ replaceInvalidAcronym = matchVersion.OnOrAfter(Version.LUCENE_24);
+ this.matchVersion = matchVersion;
+ }
+
+ /// Builds an analyzer with the stop words from the given file.
+ ///
+ ///
+ /// Lucene version to match See above />
+ ///
+ ///
+ /// File to read stop words from
+ ///
+ public StandardAnalyzer(Version matchVersion, System.IO.FileInfo stopwords)
+ : this (matchVersion, WordlistLoader.GetWordSet(stopwords))
+ {
+ }
+
+ /// Builds an analyzer with the stop words from the given reader.
+ ///
+ ///
+ /// Lucene version to match See above />
+ ///
+ ///
+ /// Reader to read stop words from
+ ///
+ public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
+ : this(matchVersion, WordlistLoader.GetWordSet(stopwords))
+ { }
+
+ /// Constructs a filtered by a
+ ///, a and a .
+ ///
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
+ tokenStream.MaxTokenLength = maxTokenLength;
+ TokenStream result = new StandardFilter(tokenStream);
+ result = new LowerCaseFilter(result);
+ result = new StopFilter(enableStopPositionIncrements, result, stopSet);
+ return result;
+ }
+
+ private sealed class SavedStreams
+ {
+ internal StandardTokenizer tokenStream;
+ internal TokenStream filteredTokenStream;
+ }
+
+ /// Default maximum allowed token length
+ public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+ private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+ /// Set maximum allowed token length. If a token is seen
+ /// that exceeds this length then it is discarded. This
+ /// setting only takes effect the next time tokenStream or
+ /// reusableTokenStream is called.
+ ///
+ public virtual int MaxTokenLength
+ {
+ get { return maxTokenLength; }
+ set { maxTokenLength = value; }
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ if (overridesTokenStreamMethod)
+ {
+ // LUCENE-1678: force fallback to tokenStream() if we
+ // have been subclassed and that subclass overrides
+ // tokenStream but not reusableTokenStream
+ return TokenStream(fieldName, reader);
+ }
+ SavedStreams streams = (SavedStreams) PreviousTokenStream;
+ if (streams == null)
+ {
+ streams = new SavedStreams();
+ PreviousTokenStream = streams;
+ streams.tokenStream = new StandardTokenizer(matchVersion, reader);
+ streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
+ streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
+ streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
+ streams.filteredTokenStream, stopSet);
+ }
+ else
+ {
+ streams.tokenStream.Reset(reader);
+ }
+ streams.tokenStream.MaxTokenLength = maxTokenLength;
+
+ streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym);
+
+ return streams.filteredTokenStream;
+ }
+ static StandardAnalyzer()
+ {
+ STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardFilter.cs
new file mode 100644
index 0000000000..fd13261084
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardFilter.cs
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Token = Lucene.Net.Analysis.Token;
+using TokenFilter = Lucene.Net.Analysis.TokenFilter;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Analysis.Standard
+{
+
+ /// Normalizes tokens extracted with .
+
+ public sealed class StandardFilter:TokenFilter
+ {
+
+
+ /// Construct filtering in .
+ public StandardFilter(TokenStream in_Renamed):base(in_Renamed)
+ {
+ termAtt = AddAttribute();
+ typeAtt = AddAttribute();
+ }
+
+ private static readonly System.String APOSTROPHE_TYPE;
+ private static readonly System.String ACRONYM_TYPE;
+
+ // this filters uses attribute type
+ private ITypeAttribute typeAtt;
+ private ITermAttribute termAtt;
+
+ /// Returns the next token in the stream, or null at EOS.
+ ///
Removes 's from the end of words.
+ ///
Removes dots from acronyms.
+ ///
+ public override bool IncrementToken()
+ {
+ if (!input.IncrementToken())
+ {
+ return false;
+ }
+
+ char[] buffer = termAtt.TermBuffer();
+ int bufferLength = termAtt.TermLength();
+ System.String type = typeAtt.Type;
+
+ if ((System.Object) type == (System.Object) APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S'))
+ {
+ // Strip last 2 characters off
+ termAtt.SetTermLength(bufferLength - 2);
+ }
+ else if ((System.Object) type == (System.Object) ACRONYM_TYPE)
+ {
+ // remove dots
+ int upto = 0;
+ for (int i = 0; i < bufferLength; i++)
+ {
+ char c = buffer[i];
+ if (c != '.')
+ buffer[upto++] = c;
+ }
+ termAtt.SetTermLength(upto);
+ }
+
+ return true;
+ }
+ static StandardFilter()
+ {
+ APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE];
+ ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizer.cs
new file mode 100644
index 0000000000..dca409d59a
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizer.cs
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using CharReader = Lucene.Net.Analysis.CharReader;
+using Token = Lucene.Net.Analysis.Token;
+using Tokenizer = Lucene.Net.Analysis.Tokenizer;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Standard
+{
+
+ /// A grammar-based tokenizer constructed with JFlex
+ ///
+ ///
This should be a good tokenizer for most European-language documents:
+ ///
+ ///
+ /// - Splits words at punctuation characters, removing punctuation. However, a
+ /// dot that's not followed by whitespace is considered part of a token.
+ /// - Splits words at hyphens, unless there's a number in the token, in which case
+ /// the whole token is interpreted as a product number and is not split.
+ /// - Recognizes email addresses and internet hostnames as one token.
+ ///
+ ///
+ ///
Many applications have specific tokenizer needs. If this tokenizer does
+ /// not suit your application, please consider copying this source code
+ /// directory to your project and maintaining your own grammar-based tokenizer.
+ ///
+ ///
+ ///
+ /// You must specify the required compatibility when creating
+ /// StandardAnalyzer:
+ ///
+ /// - As of 2.4, Tokens incorrectly identified as acronyms are corrected (see
+ ///
LUCENE-1608
+ ///
+ ///
+
+ public sealed class StandardTokenizer:Tokenizer
+ {
+ private void InitBlock()
+ {
+ maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+ }
+ /// A private instance of the JFlex-constructed scanner
+ private StandardTokenizerImpl scanner;
+
+ public const int ALPHANUM = 0;
+ public const int APOSTROPHE = 1;
+ public const int ACRONYM = 2;
+ public const int COMPANY = 3;
+ public const int EMAIL = 4;
+ public const int HOST = 5;
+ public const int NUM = 6;
+ public const int CJ = 7;
+
+ /// this solves a bug where HOSTs that end with '.' are identified
+ /// as ACRONYMs.
+ ///
+ [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs.")]
+ public const int ACRONYM_DEP = 8;
+
+ /// String token types that correspond to token type int constants
+ public static readonly System.String[] TOKEN_TYPES = new System.String[]{"", "", "", "", "", "", "", "", ""};
+
+ private bool replaceInvalidAcronym;
+
+ private int maxTokenLength;
+
+ /// Set the max allowed token length. Any token longer
+ /// than this is skipped.
+ ///
+ public int MaxTokenLength
+ {
+ get { return maxTokenLength; }
+ set { this.maxTokenLength = value; }
+ }
+
+ /// Creates a new instance of the
+ /// . Attaches
+ /// the input to the newly created JFlex scanner.
+ ///
+ ///
+ ///
+ /// The input reader
+ ///
+ /// See http://issues.apache.org/jira/browse/LUCENE-1068
+ ///
+ public StandardTokenizer(Version matchVersion, System.IO.TextReader input):base()
+ {
+ InitBlock();
+ this.scanner = new StandardTokenizerImpl(input);
+ Init(input, matchVersion);
+ }
+
+ /// Creates a new StandardTokenizer with a given .
+ public StandardTokenizer(Version matchVersion, AttributeSource source, System.IO.TextReader input):base(source)
+ {
+ InitBlock();
+ this.scanner = new StandardTokenizerImpl(input);
+ Init(input, matchVersion);
+ }
+
+ /// Creates a new StandardTokenizer with a given
+ ///
+ ///
+ public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input):base(factory)
+ {
+ InitBlock();
+ this.scanner = new StandardTokenizerImpl(input);
+ Init(input, matchVersion);
+ }
+
+ private void Init(System.IO.TextReader input, Version matchVersion)
+ {
+ if (matchVersion.OnOrAfter(Version.LUCENE_24))
+ {
+ replaceInvalidAcronym = true;
+ }
+ else
+ {
+ replaceInvalidAcronym = false;
+ }
+ this.input = input;
+ termAtt = AddAttribute();
+ offsetAtt = AddAttribute();
+ posIncrAtt = AddAttribute();
+ typeAtt = AddAttribute();
+ }
+
+ // this tokenizer generates three attributes:
+ // offset, positionIncrement and type
+ private ITermAttribute termAtt;
+ private IOffsetAttribute offsetAtt;
+ private IPositionIncrementAttribute posIncrAtt;
+ private ITypeAttribute typeAtt;
+
+ ///
+ /// (non-Javadoc)
+ ///
+ ///
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ int posIncr = 1;
+
+ while (true)
+ {
+ int tokenType = scanner.GetNextToken();
+
+ if (tokenType == StandardTokenizerImpl.YYEOF)
+ {
+ return false;
+ }
+
+ if (scanner.Yylength() <= maxTokenLength)
+ {
+ posIncrAtt.PositionIncrement = posIncr;
+ scanner.GetText(termAtt);
+ int start = scanner.Yychar();
+ offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength()));
+ // This 'if' should be removed in the next release. For now, it converts
+ // invalid acronyms to HOST. When removed, only the 'else' part should
+ // remain.
+ if (tokenType == StandardTokenizerImpl.ACRONYM_DEP)
+ {
+ if (replaceInvalidAcronym)
+ {
+ typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST];
+ termAtt.SetTermLength(termAtt.TermLength() - 1); // remove extra '.'
+ }
+ else
+ {
+ typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+ }
+ }
+ else
+ {
+ typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[tokenType];
+ }
+ return true;
+ }
+ // When we skip a too-long term, we still increment the
+ // position increment
+ else
+ posIncr++;
+ }
+ }
+
+ public override void End()
+ {
+ // set final offset
+ int finalOffset = CorrectOffset(scanner.Yychar() + scanner.Yylength());
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset(System.IO.TextReader reader)
+ {
+ base.Reset(reader);
+ scanner.Reset(reader);
+ }
+
+ ///
+ /// Remove in 3.X and make true the only valid value
+ /// See https://issues.apache.org/jira/browse/LUCENE-1068
+ ///
+ /// Set to true to replace mischaracterized acronyms as HOST.
+ ///
+ [Obsolete("Remove in 3.X and make true the only valid value. See https://issues.apache.org/jira/browse/LUCENE-1068")]
+ public void SetReplaceInvalidAcronym(bool replaceInvalidAcronym)
+ {
+ this.replaceInvalidAcronym = replaceInvalidAcronym;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizerImpl.cs b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizerImpl.cs
new file mode 100644
index 0000000000..cb4bf5fb4b
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizerImpl.cs
@@ -0,0 +1,707 @@
+/* The following code was generated by JFlex 1.4.1 on 9/4/08 6:49 PM */
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+ NOTE: if you change StandardTokenizerImpl.jflex and need to regenerate the tokenizer,
+ the tokenizer, only use Java 1.4 !!!
+ This grammar currently uses constructs (eg :digit:, :letter:) whose
+ meaning can vary according to the JRE used to run jflex. See
+ https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+ For current backwards compatibility it is needed to support
+ only Java 1.4 - this will change in Lucene 3.1.
+*/
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Token = Lucene.Net.Analysis.Token;
+
+namespace Lucene.Net.Analysis.Standard
+{
+
+
+ /// This class is a scanner generated by
+ /// JFlex 1.4.1
+ /// on 9/4/08 6:49 PM from the specification file
+ /// /tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+ ///
+ class StandardTokenizerImpl
+ {
+
+ /// This character denotes the end of file
+ public const int YYEOF = - 1;
+
+ /// initial size of the lookahead buffer
+ private const int ZZ_BUFFERSIZE = 16384;
+
+ /// lexical states
+ public const int YYINITIAL = 0;
+
+ /// Translates characters to character classes
+ private const System.String ZZ_CMAP_PACKED = "\x0009\x0000\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0000\x0001\x000C\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0005" + "\x0001\x0003\x0004\x0000\x0001\x0009\x0001\x0007\x0001\x0004\x0001\x0009\x000A\x0002\x0006\x0000\x0001\x0006\x001A\x000A" + "\x0004\x0000\x0001\x0008\x0001\x0000\x001A\x000A\x002F\x0000\x0001\x000A\x000A\x0000\x0001\x000A\x0004\x0000\x0001\x000A" + "\x0005\x0000\x0017\x000A\x0001\x0000\x001F\x000A\x0001\x0000\u0128\x000A\x0002\x0000\x0012\x000A\x001C\x0000\x005E\x000A" + "\x0002\x0000\x0009\x000A\x0002\x0000\x0007\x000A\x000E\x0000\x0002\x000A\x000E\x0000\x0005\x000A\x0009\x0000\x0001\x000A" + "\x008B\x0000\x0001\x000A\x000B\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0014\x000A" + "\x0001\x0000\x002C\x000A\x0001\x0000\x0008\x000A\x0002\x0000\x001A\x000A\x000C\x0000\x0082\x000A\x000A\x0000\x0039\x000A" + "\x0002\x0000\x0002\x000A\x0002\x0000\x0002\x000A\x0003\x0000\x0026\x000A\x0002\x0000\x0002\x000A\x0037\x0000\x0026\x000A" + "\x0002\x0000\x0001\x000A\x0007\x0000\x0027\x000A\x0048\x0000\x001B\x000A\x0005\x0000\x0003\x000A\x002E\x0000\x001A\x000A" + "\x0005\x0000\x000B\x000A\x0015\x0000\x000A\x0002\x0007\x0000\x0063\x000A\x0001\x0000\x0001\x000A\x000F\x0000\x0002\x000A" + "\x0009\x0000\x000A\x0002\x0003\x000A\x0013\x0000\x0001\x000A\x0001\x0000\x001B\x000A\x0053\x0000\x0026\x000A\u015f\x0000" + "\x0035\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x0007\x0000\x000A\x000A\x0004\x0000\x000A\x0002\x0015\x0000" + "\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0003\x0000" + "\x0004\x000A\x0022\x0000\x0002\x000A\x0001\x0000\x0003\x000A\x0004\x0000\x000A\x0002\x0002\x000A\x0013\x0000\x0006\x000A" + "\x0004\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0002\x000A\x0001\x0000\x0002\x000A" +
+ "\x0001\x0000\x0002\x000A\x001F\x0000\x0004\x000A\x0001\x0000\x0001\x000A\x0007\x0000\x000A\x0002\x0002\x0000\x0003\x000A" + "\x0010\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x0005\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x000F\x0000\x0001\x000A" + "\x0005\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0002\x0000\x0004\x000A\x0003\x0000\x0001\x000A\x001E\x0000\x0002\x000A\x0001\x0000\x0003\x000A" + "\x0004\x0000\x000A\x0002\x0015\x0000\x0006\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0004\x000A\x0003\x0000\x0002\x000A" + "\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A\x0003\x0000\x0002\x000A\x0003\x0000\x0003\x000A\x0003\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x002D\x0000\x0009\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0024\x0000\x0001\x000A" + "\x0001\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x0010\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0012\x000A\x0003\x0000\x0018\x000A" + "\x0001\x0000\x0009\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0007\x000A\x0039\x0000\x0001\x0001\x0030\x000A\x0001\x0001" + "\x0002\x000A\x000C\x0001\x0007\x000A\x0009\x0001\x000A\x0002\x0027\x0000\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000" + "\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0001\x000A\x0006\x0000\x0004\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0002\x000A\x0001\x0000\x0004\x000A\x0001\x0000" +
+ "\x0002\x000A\x0009\x0000\x0001\x000A\x0002\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0009\x0000\x000A\x0002\x0002\x0000" + "\x0002\x000A\x0022\x0000\x0001\x000A\x001F\x0000\x000A\x0002\x0016\x0000\x0008\x000A\x0001\x0000\x0022\x000A\x001D\x0000" + "\x0004\x000A\x0074\x0000\x0022\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0002\x000A\x0015\x0000\x000A\x0002\x0006\x0000" + "\x0006\x000A\x004A\x0000\x0026\x000A\x000A\x0000\x0027\x000A\x0009\x0000\x005A\x000A\x0005\x0000\x0044\x000A\x0005\x0000" + "\x0052\x000A\x0006\x0000\x0007\x000A\x0001\x0000\x003F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000" + "\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0027\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0004\x000A\x0002\x0000\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0017\x000A\x0001\x0000" + "\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0027\x000A\x0001\x0000" + "\x0013\x000A\x000E\x0000\x0009\x0002\x002E\x0000\x0055\x000A\x000C\x0000\u026c\x000A\x0002\x0000\x0008\x000A\x000A\x0000" + "\x001A\x000A\x0005\x0000\x004B\x000A\x0095\x0000\x0034\x000A\x002C\x0000\x000A\x0002\x0026\x0000\x000A\x0002\x0006\x0000" + "\x0058\x000A\x0008\x0000\x0029\x000A\u0557\x0000\x009C\x000A\x0004\x0000\x005A\x000A\x0006\x0000\x0016\x000A\x0002\x0000" + "\x0006\x000A\x0002\x0000\x0026\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0008\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x001F\x000A\x0002\x0000\x0035\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0003\x0000\x0004\x000A\x0002\x0000\x0006\x000A\x0004\x0000" + "\x000D\x000A\x0005\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0082\x0000\x0001\x000A\x0082\x0000\x0001\x000A\x0004\x0000" +
+ "\x0001\x000A\x0002\x0000\x000A\x000A\x0001\x0000\x0001\x000A\x0003\x0000\x0005\x000A\x0006\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0007\x000A\u0ecb\x0000" + "\x0002\x000A\x002A\x0000\x0005\x000A\x000A\x0000\x0001\x000B\x0054\x000B\x0008\x000B\x0002\x000B\x0002\x000B\x005A\x000B" + "\x0001\x000B\x0003\x000B\x0006\x000B\x0028\x000B\x0003\x000B\x0001\x0000\x005E\x000A\x0011\x0000\x0018\x000A\x0038\x0000" + "\x0010\x000B\u0100\x0000\x0080\x000B\x0080\x0000\u19b6\x000B\x000A\x000B\x0040\x0000\u51a6\x000B\x005A\x000B\u048d\x000A" + "\u0773\x0000\u2ba4\x000A\u215c\x0000\u012e\x000B\x00D2\x000B\x0007\x000A\x000C\x0000\x0005\x000A\x0005\x0000\x0001\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x000D\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x006C\x000A\x0021\x0000\u016b\x000A\x0012\x0000\x0040\x000A\x0002\x0000\x0036\x000A" + "\x0028\x0000\x000C\x000A\x0074\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0087\x000A\x0013\x0000\x000A\x0002" + "\x0007\x0000\x001A\x000A\x0006\x0000\x001A\x000A\x000A\x0000\x0001\x000B\x003A\x000B\x001F\x000A\x0003\x0000\x0006\x000A" + "\x0002\x0000\x0006\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0003\x000A\x0023\x0000";
+
+ /// Translates characters to character classes
+ private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED);
+
+ /// Translates DFA states to action switch labels.
+ private static readonly int[] ZZ_ACTION = ZzUnpackAction();
+
+ private const System.String ZZ_ACTION_PACKED_0 = "\x0001\x0000\x0001\x0001\x0003\x0002\x0001\x0003\x0001\x0001\x000B\x0000\x0001\x0002\x0003\x0004" + "\x0002\x0000\x0001\x0005\x0001\x0000\x0001\x0005\x0003\x0004\x0006\x0005\x0001\x0006\x0001\x0004" + "\x0002\x0007\x0001\x0008\x0001\x0000\x0001\x0008\x0003\x0000\x0002\x0008\x0001\x0009\x0001\x000A" + "\x0001\x0004";
+
+ private static int[] ZzUnpackAction()
+ {
+ int[] result = new int[51];
+ int offset = 0;
+ offset = ZzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int ZzUnpackAction(System.String packed, int offset, int[] result)
+ {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.Length;
+ while (i < l)
+ {
+ int count = packed[i++];
+ int value_Renamed = packed[i++];
+ do
+ result[j++] = value_Renamed;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+
+ /// Translates a state to a row index in the transition table
+ private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap();
+
+ private const System.String ZZ_ROWMAP_PACKED_0 = "\x0000\x0000\x0000\x000E\x0000\x001C\x0000\x002A\x0000\x0038\x0000\x000E\x0000\x0046\x0000\x0054" + "\x0000\x0062\x0000\x0070\x0000\x007E\x0000\x008C\x0000\x009A\x0000\x00A8\x0000\x00B6\x0000\x00C4" + "\x0000\x00D2\x0000\x00E0\x0000\x00EE\x0000\x00FC\x0000\u010a\x0000\u0118\x0000\u0126\x0000\u0134" + "\x0000\u0142\x0000\u0150\x0000\u015e\x0000\u016c\x0000\u017a\x0000\u0188\x0000\u0196\x0000\u01a4" + "\x0000\u01b2\x0000\u01c0\x0000\u01ce\x0000\u01dc\x0000\u01ea\x0000\u01f8\x0000\x00D2\x0000\u0206" + "\x0000\u0214\x0000\u0222\x0000\u0230\x0000\u023e\x0000\u024c\x0000\u025a\x0000\x0054\x0000\x008C" + "\x0000\u0268\x0000\u0276\x0000\u0284";
+
+ private static int[] ZzUnpackRowMap()
+ {
+ int[] result = new int[51];
+ int offset = 0;
+ offset = ZzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int ZzUnpackRowMap(System.String packed, int offset, int[] result)
+ {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.Length;
+ while (i < l)
+ {
+ int high = packed[i++] << 16;
+ result[j++] = high | packed[i++];
+ }
+ return j;
+ }
+
+ /// The transition table of the DFA
+ private static readonly int[] ZZ_TRANS = ZzUnpackTrans();
+
+ private const System.String ZZ_TRANS_PACKED_0 = "\x0001\x0002\x0001\x0003\x0001\x0004\x0007\x0002\x0001\x0005\x0001\x0006\x0001\x0007\x0001\x0002" + "\x000F\x0000\x0002\x0003\x0001\x0000\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x0003\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x0000\x0001\x000C\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x0004\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x000F\x0001\x0010" + "\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0010\x0000\x0001\x0002\x0001\x0000" + "\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0017" + "\x0004\x0000\x0001\x0018\x0001\x0019\x0007\x0000\x0001\x001A\x0005\x0000\x0001\x001B\x0007\x0000" + "\x0001\x000B\x0004\x0000\x0001\x001C\x0001\x001D\x0007\x0000\x0001\x001E\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0007\x0000\x0001\x0021\x0004\x0000\x0001\x0022\x0001\x0023\x0007\x0000\x0001\x0024" + "\x000D\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0026\x000D\x0000" + "\x0001\x0027\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0028\x0004\x0000\x0001\x0003\x0001\x0004" + "\x0001\x000F\x0001\x0008\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0004\x0000" + "\x0002\x0014\x0001\x0000\x0001\x0029\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014" + "\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x002B\x0001\x0000\x0001\x0009\x0002\x002C" + "\x0001\x002D\x0001\x0015\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x0029\x0001\x0000" + "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0001\x0000\x0001\x002E" + "\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0017\x0004\x0000\x0002\x0018\x0001\x0000\x0001\x002A" + "\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0018\x0004\x0000\x0001\x0018\x0001\x0019" + "\x0001\x0000\x0001\x002C\x0001\x0000\x0001\x0009\x0002\x002C\x0001\x002D\x0001\x0019\x0004\x0000" +
+ "\x0001\x0018\x0001\x0019\x0001\x0000\x0001\x002A\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000" + "\x0001\x001A\x0005\x0000\x0001\x001B\x0001\x0000\x0001\x002D\x0002\x0000\x0003\x002D\x0001\x001B" + "\x0004\x0000\x0002\x001C\x0001\x0000\x0001\x002F\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x001C\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x0030\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x001D\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x002F" + "\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001E\x0004\x0000\x0002\x001F\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001F\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0009\x0002\x000D\x0001\x000E\x0001\x0020" + "\x0004\x0000\x0001\x001F\x0001\x0020\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A" + "\x0001\x000B\x0001\x0021\x0004\x0000\x0002\x0022\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0022\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000E\x0002\x0000\x0003\x000E" + "\x0001\x0023\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0024\x0006\x0000\x0001\x000F\x0006\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015" + "\x0001\x0000\x0001\x0031\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000" + "\x0002\x0017\x0001\x0000\x0001\x002E\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0028\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0014\x0004\x0000\x0002\x0018\x0007\x0000\x0001\x0018\x0004\x0000" + "\x0002\x001C\x0007\x0000\x0001\x001C\x0004\x0000\x0002\x001F\x0007\x0000\x0001\x001F\x0004\x0000" + "\x0002\x0022\x0007\x0000\x0001\x0022\x0004\x0000\x0002\x0032\x0007\x0000\x0001\x0032\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0033\x0004\x0000\x0002\x0032\x0001\x0000\x0001\x002E\x0002\x0000" + "\x0001\x002E\x0002\x0000\x0001\x0032\x0004\x0000\x0002\x0014\x0001\x0000\x0001\x0031\x0001\x0000" +
+ "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014\x0003\x0000";
+
+ private static int[] ZzUnpackTrans()
+ {
+ int[] result = new int[658];
+ int offset = 0;
+ offset = ZzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int ZzUnpackTrans(System.String packed, int offset, int[] result)
+ {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.Length;
+ while (i < l)
+ {
+ int count = packed[i++];
+ int value_Renamed = packed[i++];
+ value_Renamed--;
+ do
+ result[j++] = value_Renamed;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+
+ /* error codes */
+ private const int ZZ_UNKNOWN_ERROR = 0;
+ private const int ZZ_NO_MATCH = 1;
+ private const int ZZ_PUSHBACK_2BIG = 2;
+
+ /* error messages for the codes above */
+ private static readonly System.String[] ZZ_ERROR_MSG = new System.String[]{"Unkown internal scanner error", "Error: could not match input", "Error: pushback value was too large"};
+
+ /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState
+ private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute();
+
+ private const System.String ZZ_ATTRIBUTE_PACKED_0 = "\x0001\x0000\x0001\x0009\x0003\x0001\x0001\x0009\x0001\x0001\x000B\x0000\x0004\x0001\x0002\x0000" + "\x0001\x0001\x0001\x0000\x000F\x0001\x0001\x0000\x0001\x0001\x0003\x0000\x0005\x0001";
+
+ private static int[] ZzUnpackAttribute()
+ {
+ int[] result = new int[51];
+ int offset = 0;
+ offset = ZzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+ return result;
+ }
+
+ private static int ZzUnpackAttribute(System.String packed, int offset, int[] result)
+ {
+ int i = 0; /* index in packed string */
+ int j = offset; /* index in unpacked array */
+ int l = packed.Length;
+ while (i < l)
+ {
+ int count = packed[i++];
+ int value_Renamed = packed[i++];
+ do
+ result[j++] = value_Renamed;
+ while (--count > 0);
+ }
+ return j;
+ }
+
+ /// the input device
+ private System.IO.TextReader zzReader;
+
+ /// the current state of the DFA
+ private int zzState;
+
+ /// the current lexical state
+ private int zzLexicalState = YYINITIAL;
+
+ /// this buffer contains the current text to be matched and is
+ /// the source of the yytext() string
+ ///
+ private char[] zzBuffer = new char[ZZ_BUFFERSIZE];
+
+ /// the textposition at the last accepting state
+ private int zzMarkedPos;
+
+ /// the textposition at the last state to be included in yytext
+ private int zzPushbackPos;
+
+ /// the current text position in the buffer
+ private int zzCurrentPos;
+
+ /// startRead marks the beginning of the yytext() string in the buffer
+ private int zzStartRead;
+
+ /// endRead marks the last character in the buffer, that has been read
+ /// from input
+ ///
+ private int zzEndRead;
+
+ /// number of newlines encountered up to the start of the matched text
+ private int yyline;
+
+ /// the number of characters up to the start of the matched text
+ private int yychar;
+
+ /// the number of characters from the last newline up to the start of the
+ /// matched text
+ ///
+ private int yycolumn;
+
+ /// zzAtBOL == true <=> the scanner is currently at the beginning of a line
+ private bool zzAtBOL = true;
+
+ /// zzAtEOF == true <=> the scanner is at the EOF
+ private bool zzAtEOF;
+
+ /* user code: */
+
+ public static readonly int ALPHANUM;
+ public static readonly int APOSTROPHE;
+ public static readonly int ACRONYM;
+ public static readonly int COMPANY;
+ public static readonly int EMAIL;
+ public static readonly int HOST;
+ public static readonly int NUM;
+ public static readonly int CJ;
+ /// this solves a bug where HOSTs that end with '.' are identified
+ /// as ACRONYMs.
+ ///
+ [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs")]
+ public static readonly int ACRONYM_DEP;
+
+ public static readonly System.String[] TOKEN_TYPES;
+
+ public int Yychar()
+ {
+ return yychar;
+ }
+
+ /*
+ * Resets the Tokenizer to a new Reader.
+ */
+ internal void Reset(System.IO.TextReader r)
+ {
+ // reset to default buffer size, if buffer has grown
+ if (zzBuffer.Length > ZZ_BUFFERSIZE)
+ {
+ zzBuffer = new char[ZZ_BUFFERSIZE];
+ }
+ Yyreset(r);
+ }
+
+ /// Fills Lucene token with the current token text.
+ internal void GetText(Token t)
+ {
+ t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+ }
+
+ /// Fills TermAttribute with the current token text.
+ internal void GetText(ITermAttribute t)
+ {
+ t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+ }
+
+
+ /// Creates a new scanner
+ /// There is also a java.io.InputStream version of this constructor.
+ ///
+ ///
+ /// the java.io.Reader to read input from.
+ ///
+ internal StandardTokenizerImpl(System.IO.TextReader in_Renamed)
+ {
+ this.zzReader = in_Renamed;
+ }
+
+ /// Creates a new scanner.
+ /// There is also java.io.Reader version of this constructor.
+ ///
+ ///
+ /// the java.io.Inputstream to read input from.
+ ///
+ internal StandardTokenizerImpl(System.IO.Stream in_Renamed):this(new System.IO.StreamReader(in_Renamed, System.Text.Encoding.Default))
+ {
+ }
+
+ /// Unpacks the compressed character translation table.
+ ///
+ ///
+ /// the packed character translation table
+ ///
+ /// the unpacked character translation table
+ ///
+ private static char[] ZzUnpackCMap(System.String packed)
+ {
+ char[] map = new char[0x10000];
+ int i = 0; /* index in packed string */
+ int j = 0; /* index in unpacked array */
+ while (i < 1154)
+ {
+ int count = packed[i++];
+ char value_Renamed = packed[i++];
+ do
+ map[j++] = value_Renamed;
+ while (--count > 0);
+ }
+ return map;
+ }
+
+
+ /// Refills the input buffer.
+ ///
+ /// false , iff there was new input.
+ ///
+ ///
+ /// if any I/O-Error occurs
+ ///
+ private bool ZzRefill()
+ {
+
+ /* first: make room (if you can) */
+ if (zzStartRead > 0)
+ {
+ Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead);
+
+ /* translate stored positions */
+ zzEndRead -= zzStartRead;
+ zzCurrentPos -= zzStartRead;
+ zzMarkedPos -= zzStartRead;
+ zzPushbackPos -= zzStartRead;
+ zzStartRead = 0;
+ }
+
+ /* is the buffer big enough? */
+ if (zzCurrentPos >= zzBuffer.Length)
+ {
+ /* if not: blow it up */
+ char[] newBuffer = new char[zzCurrentPos * 2];
+ Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length);
+ zzBuffer = newBuffer;
+ }
+
+ /* finally: fill the buffer with new input */
+ int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead);
+
+ if (numRead <= 0)
+ {
+ return true;
+ }
+ else
+ {
+ zzEndRead += numRead;
+ return false;
+ }
+ }
+
+
+ /// Closes the input stream.
+ public void Yyclose()
+ {
+ zzAtEOF = true; /* indicate end of file */
+ zzEndRead = zzStartRead; /* invalidate buffer */
+
+ if (zzReader != null)
+ zzReader.Close();
+ }
+
+
+ /// Resets the scanner to read from a new input stream.
+ /// Does not close the old reader.
+ ///
+ /// All internal variables are reset, the old input stream
+ /// cannot be reused (internal buffer is discarded and lost).
+ /// Lexical state is set to ZZ_INITIAL .
+ ///
+ ///
+ /// the new input stream
+ ///
+ public void Yyreset(System.IO.TextReader reader)
+ {
+ zzReader = reader;
+ zzAtBOL = true;
+ zzAtEOF = false;
+ zzEndRead = zzStartRead = 0;
+ zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
+ yyline = yychar = yycolumn = 0;
+ zzLexicalState = YYINITIAL;
+ }
+
+
+ /// Returns the current lexical state.
+ public int Yystate()
+ {
+ return zzLexicalState;
+ }
+
+
+ /// Enters a new lexical state
+ ///
+ ///
+ /// the new lexical state
+ ///
+ public void Yybegin(int newState)
+ {
+ zzLexicalState = newState;
+ }
+
+
+ /// Returns the text matched by the current regular expression.
+ public System.String Yytext()
+ {
+ return new System.String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
+ }
+
+
+ /// Returns the character at position pos from the
+ /// matched text.
+ ///
+ /// It is equivalent to yytext().charAt(pos), but faster
+ ///
+ ///
+ /// the position of the character to fetch.
+ /// A value from 0 to yylength()-1.
+ ///
+ ///
+ /// the character at position pos
+ ///
+ public char Yycharat(int pos)
+ {
+ return zzBuffer[zzStartRead + pos];
+ }
+
+
+ /// Returns the length of the matched text region.
+ public int Yylength()
+ {
+ return zzMarkedPos - zzStartRead;
+ }
+
+
+ /// Reports an error that occured while scanning.
+ ///
+ /// In a wellformed scanner (no or only correct usage of
+ /// yypushback(int) and a match-all fallback rule) this method
+ /// will only be called with things that "Can't Possibly Happen".
+ /// If this method is called, something is seriously wrong
+ /// (e.g. a JFlex bug producing a faulty scanner etc.).
+ ///
+ /// Usual syntax/scanner level error handling should be done
+ /// in error fallback rules.
+ ///
+ ///
+ /// the code of the errormessage to display
+ ///
+ private void ZzScanError(int errorCode)
+ {
+ System.String message;
+ try
+ {
+ message = ZZ_ERROR_MSG[errorCode];
+ }
+ catch (System.IndexOutOfRangeException)
+ {
+ message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+ }
+
+ throw new System.ApplicationException(message);
+ }
+
+
+ /// Pushes the specified amount of characters back into the input stream.
+ ///
+ /// They will be read again by then next call of the scanning method
+ ///
+ ///
+ /// the number of characters to be read again.
+ /// This number must not be greater than yylength()!
+ ///
+ public virtual void Yypushback(int number)
+ {
+ if (number > Yylength())
+ ZzScanError(ZZ_PUSHBACK_2BIG);
+
+ zzMarkedPos -= number;
+ }
+
+
+ /// Resumes scanning until the next regular expression is matched,
+ /// the end of input is encountered or an I/O-Error occurs.
+ ///
+ ///
+ /// the next token
+ ///
+ /// if any I/O-Error occurs
+ ///
+ public virtual int GetNextToken()
+ {
+ int zzInput;
+ int zzAction;
+
+ // cached fields:
+ int zzCurrentPosL;
+ int zzMarkedPosL;
+ int zzEndReadL = zzEndRead;
+ char[] zzBufferL = zzBuffer;
+ char[] zzCMapL = ZZ_CMAP;
+
+ int[] zzTransL = ZZ_TRANS;
+ int[] zzRowMapL = ZZ_ROWMAP;
+ int[] zzAttrL = ZZ_ATTRIBUTE;
+
+ while (true)
+ {
+ zzMarkedPosL = zzMarkedPos;
+
+ yychar += zzMarkedPosL - zzStartRead;
+
+ zzAction = - 1;
+
+ zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+
+ zzState = zzLexicalState;
+
+
+ {
+ while (true)
+ {
+
+ if (zzCurrentPosL < zzEndReadL)
+ zzInput = zzBufferL[zzCurrentPosL++];
+ else if (zzAtEOF)
+ {
+ zzInput = YYEOF;
+ goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+ }
+ else
+ {
+ // store back cached positions
+ zzCurrentPos = zzCurrentPosL;
+ zzMarkedPos = zzMarkedPosL;
+ bool eof = ZzRefill();
+ // get translated positions and possibly new buffer
+ zzCurrentPosL = zzCurrentPos;
+ zzMarkedPosL = zzMarkedPos;
+ zzBufferL = zzBuffer;
+ zzEndReadL = zzEndRead;
+ if (eof)
+ {
+ zzInput = YYEOF;
+ goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+ }
+ else
+ {
+ zzInput = zzBufferL[zzCurrentPosL++];
+ }
+ }
+ int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]];
+ if (zzNext == - 1)
+ {
+ goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+ }
+ zzState = zzNext;
+
+ int zzAttributes = zzAttrL[zzState];
+ if ((zzAttributes & 1) == 1)
+ {
+ zzAction = zzState;
+ zzMarkedPosL = zzCurrentPosL;
+ if ((zzAttributes & 8) == 8)
+ {
+ goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place
+ }
+ }
+ }
+ }
+
+zzForAction_brk: ; // {{Aroush-2.9}} this 'lable' maybe in the wrong place
+
+
+ // store back cached position
+ zzMarkedPos = zzMarkedPosL;
+
+ switch (zzAction < 0?zzAction:ZZ_ACTION[zzAction])
+ {
+
+ case 4:
+ {
+ return HOST;
+ }
+
+ case 11: break;
+
+ case 9:
+ {
+ return ACRONYM;
+ }
+
+ case 12: break;
+
+ case 8:
+ {
+ return ACRONYM_DEP;
+ }
+
+ case 13: break;
+
+ case 1:
+ {
+ /* ignore */
+ }
+ goto case 14;
+
+ case 14: break;
+
+ case 5:
+ {
+ return NUM;
+ }
+
+ case 15: break;
+
+ case 3:
+ {
+ return CJ;
+ }
+
+ case 16: break;
+
+ case 2:
+ {
+ return ALPHANUM;
+ }
+
+ case 17: break;
+
+ case 7:
+ {
+ return COMPANY;
+ }
+
+ case 18: break;
+
+ case 6:
+ {
+ return APOSTROPHE;
+ }
+
+ case 19: break;
+
+ case 10:
+ {
+ return EMAIL;
+ }
+
+ case 20: break;
+
+ default:
+ if (zzInput == YYEOF && zzStartRead == zzCurrentPos)
+ {
+ zzAtEOF = true;
+ return YYEOF;
+ }
+ else
+ {
+ ZzScanError(ZZ_NO_MATCH);
+ }
+ break;
+
+ }
+ }
+ }
+ static StandardTokenizerImpl()
+ {
+ ALPHANUM = StandardTokenizer.ALPHANUM;
+ APOSTROPHE = StandardTokenizer.APOSTROPHE;
+ ACRONYM = StandardTokenizer.ACRONYM;
+ COMPANY = StandardTokenizer.COMPANY;
+ EMAIL = StandardTokenizer.EMAIL;
+ HOST = StandardTokenizer.HOST;
+ NUM = StandardTokenizer.NUM;
+ CJ = StandardTokenizer.CJ;
+ ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
+ TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/StopAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/StopAnalyzer.cs
new file mode 100644
index 0000000000..aabe19746f
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/StopAnalyzer.cs
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Filters with and
+ /// .
+ ///
+ ///
+ ///
+ /// You must specify the required compatibility when creating
+ /// StopAnalyzer:
+ ///
+ /// - As of 2.9, position increments are preserved
+ ///
+ ///
+
+ public sealed class StopAnalyzer:Analyzer
+ {
+ private readonly ISet stopWords;
+ private readonly bool enablePositionIncrements;
+
+ /// An unmodifiable set containing some common English words that are not usually useful
+ /// for searching.
+ ///
+ public static ISet ENGLISH_STOP_WORDS_SET;
+
+ /// Builds an analyzer which removes words in ENGLISH_STOP_WORDS.
+ public StopAnalyzer(Version matchVersion)
+ {
+ stopWords = ENGLISH_STOP_WORDS_SET;
+ enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ }
+
+ /// Builds an analyzer with the stop words from the given set.
+ public StopAnalyzer(Version matchVersion, ISet stopWords)
+ {
+ this.stopWords = stopWords;
+ enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ }
+
+ /// Builds an analyzer with the stop words from the given file.
+ ///
+ ///
+ ///
+ ///
+ /// See above
+ ///
+ /// File to load stop words from
+ ///
+ public StopAnalyzer(Version matchVersion, System.IO.FileInfo stopwordsFile)
+ {
+ stopWords = WordlistLoader.GetWordSet(stopwordsFile);
+ enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ }
+
+ /// Builds an analyzer with the stop words from the given reader.
+ ///
+ ///
+ /// See above
+ ///
+ /// Reader to load stop words from
+ ///
+ public StopAnalyzer(Version matchVersion, System.IO.TextReader stopwords)
+ {
+ stopWords = WordlistLoader.GetWordSet(stopwords);
+ enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion);
+ }
+
+ /// Filters LowerCaseTokenizer with StopFilter.
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
+ }
+
+ /// Filters LowerCaseTokenizer with StopFilter.
+ private class SavedStreams
+ {
+ public SavedStreams(StopAnalyzer enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(StopAnalyzer enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private StopAnalyzer enclosingInstance;
+ public StopAnalyzer Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal Tokenizer source;
+ internal TokenStream result;
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var streams = (SavedStreams) PreviousTokenStream;
+ if (streams == null)
+ {
+ streams = new SavedStreams(this) {source = new LowerCaseTokenizer(reader)};
+ streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
+ PreviousTokenStream = streams;
+ }
+ else
+ streams.source.Reset(reader);
+ return streams.result;
+ }
+ static StopAnalyzer()
+ {
+ {
+ var stopWords = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"};
+ var stopSet = new CharArraySet(stopWords.Length, false);
+ stopSet.AddAll(stopWords);
+ ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/StopFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/StopFilter.cs
new file mode 100644
index 0000000000..81b7dd0b37
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/StopFilter.cs
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using QueryParser = Lucene.Net.QueryParsers.QueryParser;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Removes stop words from a token stream.
+
+ public sealed class StopFilter:TokenFilter
+ {
+ private readonly CharArraySet stopWords;
+ private bool enablePositionIncrements = false;
+
+ private readonly ITermAttribute termAtt;
+ private readonly IPositionIncrementAttribute posIncrAtt;
+
+ /// Construct a token stream filtering the given input.
+ /// If stopWords is an instance of (true if
+ /// makeStopSet() was used to construct the set) it will be directly used
+ /// and ignoreCase will be ignored since CharArraySet
+ /// directly controls case sensitivity.
+ ///
+ /// If stopWords is not an instance of ,
+ /// a new CharArraySet will be constructed and ignoreCase will be
+ /// used to specify the case sensitivity of that set.
+ ///
+ /// true if token positions should record the removed stop words
+ /// Input TokenStream
+ /// A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords
+ /// if true, all words are lower cased first
+ public StopFilter(bool enablePositionIncrements, TokenStream input, ISet stopWords, bool ignoreCase)
+ : base(input)
+ {
+ if (stopWords is CharArraySet)
+ {
+ this.stopWords = (CharArraySet) stopWords;
+ }
+ else
+ {
+ this.stopWords = new CharArraySet(stopWords.Count, ignoreCase);
+ this.stopWords.AddAll(stopWords);
+ }
+ this.enablePositionIncrements = enablePositionIncrements;
+ termAtt = AddAttribute();
+ posIncrAtt = AddAttribute();
+ }
+
+ /// Constructs a filter which removes words from the input
+ /// TokenStream that are named in the Set.
+ ///
+ /// true if token positions should record the removed stop words
+ /// Input stream
+ /// A Set of strings or char[] or any other ToString()-able set representing the stopwords
+ ///
+ public StopFilter(bool enablePositionIncrements, TokenStream @in, ISet stopWords)
+ : this(enablePositionIncrements, @in, stopWords, false)
+ { }
+
+ /// Builds a Set from an array of stop words,
+ /// appropriate for passing into the StopFilter constructor.
+ /// This permits this stopWords construction to be cached once when
+ /// an Analyzer is constructed.
+ ///
+ ///
+ /// passing false to ignoreCase
+ public static ISet MakeStopSet(params string[] stopWords)
+ {
+ return MakeStopSet(stopWords, false);
+ }
+
+ /// Builds a Set from an array of stop words,
+ /// appropriate for passing into the StopFilter constructor.
+ /// This permits this stopWords construction to be cached once when
+ /// an Analyzer is constructed.
+ ///
+ /// A list of strings or char[] or any other ToString()-able list representing the stop words
+ /// passing false to ignoreCase
+ public static ISet MakeStopSet(IList stopWords)
+ {
+ return MakeStopSet(stopWords, false);
+ }
+
+ ///
+ /// An array of stopwords
+ /// If true, all words are lower cased first.
+ /// a Set containing the words
+ public static ISet MakeStopSet(string[] stopWords, bool ignoreCase)
+ {
+ var stopSet = new CharArraySet(stopWords.Length, ignoreCase);
+ stopSet.AddAll(stopWords);
+ return stopSet;
+ }
+
+ ///
+ /// A List of Strings or char[] or any other toString()-able list representing the stopwords
+ /// if true, all words are lower cased first
+ /// A Set ( )containing the words
+ public static ISet MakeStopSet(IList stopWords, bool ignoreCase)
+ {
+ var stopSet = new CharArraySet(stopWords.Count, ignoreCase);
+ foreach(var word in stopWords)
+ stopSet.Add(word.ToString());
+ return stopSet;
+ }
+
+ /// Returns the next input Token whose term() is not a stop word.
+ public override bool IncrementToken()
+ {
+ // return the first non-stop word found
+ int skippedPositions = 0;
+ while (input.IncrementToken())
+ {
+ if (!stopWords.Contains(termAtt.TermBuffer(), 0, termAtt.TermLength()))
+ {
+ if (enablePositionIncrements)
+ {
+ posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+ }
+ return true;
+ }
+ skippedPositions += posIncrAtt.PositionIncrement;
+ }
+ // reached EOS -- return false
+ return false;
+ }
+
+ /// Returns version-dependent default for enablePositionIncrements. Analyzers
+ /// that embed StopFilter use this method when creating the StopFilter. Prior
+ /// to 2.9, this returns false. On 2.9 or later, it returns true.
+ ///
+ public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion)
+ {
+ return matchVersion.OnOrAfter(Version.LUCENE_29);
+ }
+
+ /// If true , this StopFilter will preserve
+ /// positions of the incoming tokens (ie, accumulate and
+ /// set position increments of the removed stop tokens).
+ /// Generally, true is best as it does not
+ /// lose information (positions of the original tokens)
+ /// during indexing.
+ ///
+ ///
When set, when a token is stopped
+ /// (omitted), the position increment of the following
+ /// token is incremented.
+ ///
+ ///
NOTE : be sure to also
+ /// set if
+ /// you use QueryParser to create queries.
+ ///
+ public bool EnablePositionIncrements
+ {
+ get { return enablePositionIncrements; }
+ set { enablePositionIncrements = value; }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/TeeSinkTokenFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/TeeSinkTokenFilter.cs
new file mode 100644
index 0000000000..bec605e226
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/TeeSinkTokenFilter.cs
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Attribute = Lucene.Net.Util.Attribute;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// This TokenFilter provides the ability to set aside attribute states
+ /// that have already been analyzed. This is useful in situations where multiple fields share
+ /// many common analysis steps and then go their separate ways.
+ ///
+ /// It is also useful for doing things like entity extraction or proper noun analysis as
+ /// part of the analysis workflow and saving off those tokens for use in another field.
+ ///
+ ///
+ /// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
+ /// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+ /// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+ /// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
+ /// source2.addSinkTokenStream(sink1);
+ /// source2.addSinkTokenStream(sink2);
+ /// TokenStream final1 = new LowerCaseFilter(source1);
+ /// TokenStream final2 = source2;
+ /// TokenStream final3 = new EntityDetect(sink1);
+ /// TokenStream final4 = new URLDetect(sink2);
+ /// d.add(new Field("f1", final1));
+ /// d.add(new Field("f2", final2));
+ /// d.add(new Field("f3", final3));
+ /// d.add(new Field("f4", final4));
+ ///
+ /// In this example, sink1 and sink2 will both get tokens from both
+ /// reader1 and reader2 after whitespace tokenizer
+ /// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+ /// It is important, that tees are consumed before sinks (in the above example, the field names must be
+ /// less the sink's field names). If you are not sure, which stream is consumed first, you can simply
+ /// add another sink and then pass all tokens to the sinks at once using .
+ /// This TokenFilter is exhausted after this. In the above example, change
+ /// the example above to:
+ ///
+ /// ...
+ /// TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
+ /// TokenStream final2 = source2.newSinkTokenStream();
+ /// sink1.consumeAllTokens();
+ /// sink2.consumeAllTokens();
+ /// ...
+ ///
+ /// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+ ///
Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+ ///
+ public sealed class TeeSinkTokenFilter:TokenFilter
+ {
+ public class AnonymousClassSinkFilter:SinkFilter
+ {
+ public override bool Accept(AttributeSource source)
+ {
+ return true;
+ }
+ }
+ private readonly LinkedList sinks = new LinkedList();
+
+ /// Instantiates a new TeeSinkTokenFilter.
+ public TeeSinkTokenFilter(TokenStream input):base(input)
+ {
+ }
+
+ /// Returns a new that receives all tokens consumed by this stream.
+ public SinkTokenStream NewSinkTokenStream()
+ {
+ return NewSinkTokenStream(ACCEPT_ALL_FILTER);
+ }
+
+ /// Returns a new that receives all tokens consumed by this stream
+ /// that pass the supplied filter.
+ ///
+ ///
+ ///
+ public SinkTokenStream NewSinkTokenStream(SinkFilter filter)
+ {
+ var sink = new SinkTokenStream(this.CloneAttributes(), filter);
+ sinks.AddLast(new WeakReference(sink));
+ return sink;
+ }
+
+ /// Adds a created by another TeeSinkTokenFilter
+ /// to this one. The supplied stream will also receive all consumed tokens.
+ /// This method can be used to pass tokens from two different tees to one sink.
+ ///
+ public void AddSinkTokenStream(SinkTokenStream sink)
+ {
+ // check that sink has correct factory
+ if (!this.Factory.Equals(sink.Factory))
+ {
+ throw new System.ArgumentException("The supplied sink is not compatible to this tee");
+ }
+ // add eventually missing attribute impls to the existing sink
+ foreach (var impl in this.CloneAttributes().GetAttributeImplsIterator())
+ {
+ sink.AddAttributeImpl(impl);
+ }
+ sinks.AddLast(new WeakReference(sink));
+ }
+
+ /// TeeSinkTokenFilter passes all tokens to the added sinks
+ /// when itself is consumed. To be sure, that all tokens from the input
+ /// stream are passed to the sinks, you can call this methods.
+ /// This instance is exhausted after this, but all sinks are instant available.
+ ///
+ public void ConsumeAllTokens()
+ {
+ while (IncrementToken())
+ {
+ }
+ }
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ // capture state lazily - maybe no SinkFilter accepts this state
+ State state = null;
+ foreach(WeakReference wr in sinks)
+ {
+ var sink = (SinkTokenStream)wr.Target;
+ if (sink != null)
+ {
+ if (sink.Accept(this))
+ {
+ if (state == null)
+ {
+ state = this.CaptureState();
+ }
+ sink.AddState(state);
+ }
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ public override void End()
+ {
+ base.End();
+ State finalState = CaptureState();
+ foreach(WeakReference wr in sinks)
+ {
+ var sink = (SinkTokenStream)wr.Target;
+ if (sink != null)
+ {
+ sink.SetFinalState(finalState);
+ }
+ }
+ }
+
+ /// A filter that decides which states to store in the sink.
+ public abstract class SinkFilter
+ {
+ /// Returns true, iff the current state of the passed-in shall be stored
+ /// in the sink.
+ ///
+ public abstract bool Accept(AttributeSource source);
+
+ /// Called by . This method does nothing by default
+ /// and can optionally be overridden.
+ ///
+ public virtual void Reset()
+ {
+ // nothing to do; can be overridden
+ }
+ }
+
+ public sealed class SinkTokenStream : TokenStream
+ {
+ private readonly LinkedList cachedStates = new LinkedList();
+ private State finalState;
+ private IEnumerator it = null;
+ private readonly SinkFilter filter;
+
+ internal SinkTokenStream(AttributeSource source, SinkFilter filter)
+ : base(source)
+ {
+ this.filter = filter;
+ }
+
+ internal /*private*/ bool Accept(AttributeSource source)
+ {
+ return filter.Accept(source);
+ }
+
+ internal /*private*/ void AddState(AttributeSource.State state)
+ {
+ if (it != null)
+ {
+ throw new System.SystemException("The tee must be consumed before sinks are consumed.");
+ }
+ cachedStates.AddLast(state);
+ }
+
+ internal /*private*/ void SetFinalState(AttributeSource.State finalState)
+ {
+ this.finalState = finalState;
+ }
+
+ public override bool IncrementToken()
+ {
+ // lazy init the iterator
+ if (it == null)
+ {
+ it = cachedStates.GetEnumerator();
+ }
+
+ if (!it.MoveNext())
+ {
+ return false;
+ }
+
+ State state = it.Current;
+ RestoreState(state);
+ return true;
+ }
+
+ public override void End()
+ {
+ if (finalState != null)
+ {
+ RestoreState(finalState);
+ }
+ }
+
+ public override void Reset()
+ {
+ it = cachedStates.GetEnumerator();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing.
+ }
+ }
+
+ private static readonly SinkFilter ACCEPT_ALL_FILTER;
+ static TeeSinkTokenFilter()
+ {
+ ACCEPT_ALL_FILTER = new AnonymousClassSinkFilter();
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Token.cs b/external/Lucene.Net.Light/src/core/Analysis/Token.cs
new file mode 100644
index 0000000000..3357f3417f
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Token.cs
@@ -0,0 +1,852 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using Payload = Lucene.Net.Index.Payload;
+using TermPositions = Lucene.Net.Index.TermPositions;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// A Token is an occurrence of a term from the text of a field. It consists of
+ /// a term's text, the start and end offset of the term in the text of the field,
+ /// and a type string.
+ ///
+ /// The start and end offsets permit applications to re-associate a token with
+ /// its source text, e.g., to display highlighted query terms in a document
+ /// browser, or to show matching text fragments in a KWIC display, etc.
+ ///
+ /// The type is a string, assigned by a lexical analyzer
+ /// (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+ /// belongs to. For example an end of sentence marker token might be implemented
+ /// with type "eos". The default token type is "word".
+ ///
+ /// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
+ /// length byte array. Use and
+ /// to retrieve the payloads from the index.
+ ///
+ ///
+ ///
+ ///
NOTE: As of 2.9, Token implements all interfaces
+ /// that are part of core Lucene and can be found in the namespace.
+ /// Even though it is not necessary to use Token anymore, with the new TokenStream API it can
+ /// be used as convenience class that implements all s, which is especially useful
+ /// to easily switch from the old to the new TokenStream API.
+ ///
+ ///
Tokenizers and TokenFilters should try to re-use a Token instance when
+ /// possible for best performance, by implementing the
+ /// API.
+ /// Failing that, to create a new Token you should first use
+ /// one of the constructors that starts with null text. To load
+ /// the token from a char[] use .
+ /// To load from a String use or .
+ /// Alternatively you can get the Token's termBuffer by calling either ,
+ /// if you know that your text is shorter than the capacity of the termBuffer
+ /// or , if there is any possibility
+ /// that you may need to grow the buffer. Fill in the characters of your term into this
+ /// buffer, with if loading from a string,
+ /// or with , and finally call to
+ /// set the length of the term text. See LUCENE-969
+ /// for details.
+ ///
Typical Token reuse patterns:
+ ///
+ /// - Copying text from a string (type is reset to
if not
+ /// specified):
+ ///
+ /// return reusableToken.reinit(string, startOffset, endOffset[, type]);
+ ///
+ ///
+ /// - Copying some text from a string (type is reset to
+ /// if not specified):
+ ///
+ /// return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+ ///
+ ///
+ /// - Copying text from char[] buffer (type is reset to
+ /// if not specified):
+ ///
+ /// return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+ ///
+ ///
+ /// - Copying some text from a char[] buffer (type is reset to
+ ///
if not specified):
+ ///
+ /// return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+ ///
+ ///
+ /// - Copying from one one Token to another (type is reset to
+ ///
if not specified):
+ ///
+ /// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+ ///
+ ///
+ ///
+ /// A few things to note:
+ ///
+ /// - clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.
+ /// - Because
TokenStreams can be chained, one cannot assume that the Token's current type is correct.
+ /// - The startOffset and endOffset represent the start and offset in the
+ /// source text, so be careful in adjusting them.
+ /// - When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.
+ ///
+ ///
+ ///
+ ///
+ ///
+ [Serializable]
+ public class Token : Attribute, ITermAttribute, ITypeAttribute, IPositionIncrementAttribute, IFlagsAttribute, IOffsetAttribute, IPayloadAttribute
+ {
+ public const String DEFAULT_TYPE = "word";
+
+ private const int MIN_BUFFER_SIZE = 10;
+
+ private char[] termBuffer;
+ private int termLength;
+ private int startOffset, endOffset;
+ private string type = DEFAULT_TYPE;
+ private int flags;
+ private Payload payload;
+ private int positionIncrement = 1;
+
+ /// Constructs a Token will null text.
+ public Token()
+ {
+ }
+
+ /// Constructs a Token with null text and start & end
+ /// offsets.
+ ///
+ /// start offset in the source text
+ /// end offset in the source text
+ public Token(int start, int end)
+ {
+ startOffset = start;
+ endOffset = end;
+ }
+
+ /// Constructs a Token with null text and start & end
+ /// offsets plus the Token type.
+ ///
+ /// start offset in the source text
+ /// end offset in the source text
+ /// the lexical type of this Token
+ public Token(int start, int end, String typ)
+ {
+ startOffset = start;
+ endOffset = end;
+ type = typ;
+ }
+
+ /// Constructs a Token with null text and start & end
+ /// offsets plus flags. NOTE: flags is EXPERIMENTAL.
+ ///
+ /// start offset in the source text
+ /// end offset in the source text
+ /// The bits to set for this token
+ public Token(int start, int end, int flags)
+ {
+ startOffset = start;
+ endOffset = end;
+ this.flags = flags;
+ }
+
+ /// Constructs a Token with the given term text, and start
+ /// & end offsets. The type defaults to "word."
+ /// NOTE: for better indexing speed you should
+ /// instead use the char[] termBuffer methods to set the
+ /// term text.
+ ///
+ /// term text
+ /// start offset
+ /// end offset
+ public Token(String text, int start, int end)
+ {
+ SetTermBuffer(text);
+ startOffset = start;
+ endOffset = end;
+ }
+
+ /// Constructs a Token with the given text, start and end
+ /// offsets, & type. NOTE: for better indexing
+ /// speed you should instead use the char[] termBuffer
+ /// methods to set the term text.
+ ///
+ /// term text
+ /// start offset
+ /// end offset
+ /// token type
+ public Token(System.String text, int start, int end, System.String typ)
+ {
+ SetTermBuffer(text);
+ startOffset = start;
+ endOffset = end;
+ type = typ;
+ }
+
+ /// Constructs a Token with the given text, start and end
+ /// offsets, & type. NOTE: for better indexing
+ /// speed you should instead use the char[] termBuffer
+ /// methods to set the term text.
+ ///
+ ///
+ ///
+ ///
+ /// token type bits
+ public Token(System.String text, int start, int end, int flags)
+ {
+ SetTermBuffer(text);
+ startOffset = start;
+ endOffset = end;
+ this.flags = flags;
+ }
+
+ /// Constructs a Token with the given term buffer (offset
+ /// & length), start and end
+ /// offsets
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end)
+ {
+ SetTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
+ startOffset = start;
+ endOffset = end;
+ }
+
+ /// Set the position increment. This determines the position of this token
+ /// relative to the previous Token in a , used in phrase
+ /// searching.
+ ///
+ ///
The default value is one.
+ ///
+ ///
Some common uses for this are:
+ ///
+ /// - Set it to zero to put multiple terms in the same position. This is
+ /// useful if, e.g., a word has multiple stems. Searches for phrases
+ /// including either stem will match. In this case, all but the first stem's
+ /// increment should be set to zero: the increment of the first instance
+ /// should be one. Repeating a token with an increment of zero can also be
+ /// used to boost the scores of matches on that token.
+ ///
+ /// - Set it to values greater than one to inhibit exact phrase matches.
+ /// If, for example, one does not want phrases to match across removed stop
+ /// words, then one could build a stop word filter that removes stop words and
+ /// also sets the increment to the number of stop words removed before each
+ /// non-stop word. Then exact phrase queries will only match when the terms
+ /// occur with no intervening stop words.
+ ///
+ ///
+ ///
+ /// the distance from the prior term
+ ///
+ ///
+ public virtual int PositionIncrement
+ {
+ set
+ {
+ if (value < 0)
+ throw new System.ArgumentException("Increment must be zero or greater: " + value);
+ this.positionIncrement = value;
+ }
+ get { return positionIncrement; }
+ }
+
+ /// Returns the Token's term text.
+ ///
+ /// This method has a performance penalty
+ /// because the text is stored internally in a char[]. If
+ /// possible, use and
+ /// directly instead. If you really need a
+ /// String, use this method, which is nothing more than
+ /// a convenience call to new String(token.termBuffer(), 0, token.termLength())
+ ///
+ public string Term
+ {
+ get
+ {
+ InitTermBuffer();
+ return new System.String(termBuffer, 0, termLength);
+ }
+ }
+
+ /// Copies the contents of buffer, starting at offset for
+ /// length characters, into the termBuffer array.
+ ///
+ /// the buffer to copy
+ /// the index in the buffer of the first character to copy
+ /// the number of characters to copy
+ public void SetTermBuffer(char[] buffer, int offset, int length)
+ {
+ GrowTermBuffer(length);
+ Array.Copy(buffer, offset, termBuffer, 0, length);
+ termLength = length;
+ }
+
+ /// Copies the contents of buffer into the termBuffer array.
+ /// the buffer to copy
+ ///
+ public void SetTermBuffer(System.String buffer)
+ {
+ int length = buffer.Length;
+ GrowTermBuffer(length);
+ TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
+ termLength = length;
+ }
+
+ /// Copies the contents of buffer, starting at offset and continuing
+ /// for length characters, into the termBuffer array.
+ ///
+ /// the buffer to copy
+ ///
+ /// the index in the buffer of the first character to copy
+ ///
+ /// the number of characters to copy
+ ///
+ public void SetTermBuffer(System.String buffer, int offset, int length)
+ {
+ System.Diagnostics.Debug.Assert(offset <= buffer.Length);
+ System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
+ GrowTermBuffer(length);
+ TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
+ termLength = length;
+ }
+
+ /// Returns the internal termBuffer character array which
+ /// you can then directly alter. If the array is too
+ /// small for your token, use
+ /// to increase it. After
+ /// altering the buffer be sure to call
+ /// to record the number of valid
+ /// characters that were placed into the termBuffer.
+ ///
+ public char[] TermBuffer()
+ {
+ InitTermBuffer();
+ return termBuffer;
+ }
+
+ /// Grows the termBuffer to at least size newSize, preserving the
+ /// existing content. Note: If the next operation is to change
+ /// the contents of the term buffer use
+ /// ,
+ /// , or
+ ///
+ /// to optimally combine the resize with the setting of the termBuffer.
+ ///
+ /// minimum size of the new termBuffer
+ ///
+ /// newly created termBuffer with length >= newSize
+ ///
+ public virtual char[] ResizeTermBuffer(int newSize)
+ {
+ if (termBuffer == null)
+ {
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];
+ }
+ else
+ {
+ if (termBuffer.Length < newSize)
+ {
+ // Not big enough; create a new array with slight
+ // over allocation and preserve content
+ var newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+ Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
+ termBuffer = newCharBuffer;
+ }
+ }
+ return termBuffer;
+ }
+
+ /// Allocates a buffer char[] of at least newSize, without preserving the existing content.
+ /// its always used in places that set the content
+ ///
+ /// minimum size of the buffer
+ ///
+ private void GrowTermBuffer(int newSize)
+ {
+ if (termBuffer == null)
+ {
+ // The buffer is always at least MIN_BUFFER_SIZE
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+ }
+ else
+ {
+ if (termBuffer.Length < newSize)
+ {
+ // Not big enough; create a new array with slight
+ // over allocation:
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+ }
+ }
+ }
+
+ private void InitTermBuffer()
+ {
+ if (termBuffer == null)
+ {
+ termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
+ termLength = 0;
+ }
+ }
+
+ /// Return number of valid characters (length of the term)
+ /// in the termBuffer array.
+ ///
+ public int TermLength()
+ {
+ InitTermBuffer();
+ return termLength;
+ }
+
+ /// Set number of valid characters (length of the term) in
+ /// the termBuffer array. Use this to truncate the termBuffer
+ /// or to synchronize with external manipulation of the termBuffer.
+ /// Note: to grow the size of the array,
+ /// use first.
+ ///
+ /// the truncated length
+ ///
+ public void SetTermLength(int length)
+ {
+ InitTermBuffer();
+ if (length > termBuffer.Length)
+ throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
+ termLength = length;
+ }
+
+ /// Gets or sets this Token's starting offset, the position of the first character
+ /// corresponding to this token in the source text.
+ /// Note that the difference between endOffset() and startOffset() may not be
+ /// equal to , as the term text may have been altered by a
+ /// stemmer or some other filter.
+ ///
+ public virtual int StartOffset
+ {
+ get { return startOffset; }
+ set { this.startOffset = value; }
+ }
+
+ /// Gets or sets this Token's ending offset, one greater than the position of the
+ /// last character corresponding to this token in the source text. The length
+ /// of the token in the source text is (endOffset - startOffset).
+ ///
+ public virtual int EndOffset
+ {
+ get { return endOffset; }
+ set { this.endOffset = value; }
+ }
+
+ /// Set the starting and ending offset.
+ /// See StartOffset() and EndOffset()
+ ///
+ public virtual void SetOffset(int startOffset, int endOffset)
+ {
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
+ }
+
+ /// Returns this Token's lexical type. Defaults to "word".
+ public string Type
+ {
+ get { return type; }
+ set { this.type = value; }
+ }
+
+ /// EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
+ ///
+ ///
+ /// Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes.
+ /// The flags can be used to encode information about the token for use by other s.
+ ///
+ ///
+ ///
+ /// The bits
+ public virtual int Flags
+ {
+ get { return flags; }
+ set { flags = value; }
+ }
+
+ /// Returns this Token's payload.
+ public virtual Payload Payload
+ {
+ get { return payload; }
+ set { payload = value; }
+ }
+
+ public override String ToString()
+ {
+ var sb = new System.Text.StringBuilder();
+ sb.Append('(');
+ InitTermBuffer();
+ if (termBuffer == null)
+ sb.Append("null");
+ else
+ sb.Append(termBuffer, 0, termLength);
+ sb.Append(',').Append(startOffset).Append(',').Append(endOffset);
+ if (!type.Equals("word"))
+ sb.Append(",type=").Append(type);
+ if (positionIncrement != 1)
+ sb.Append(",posIncr=").Append(positionIncrement);
+ sb.Append(')');
+ return sb.ToString();
+ }
+
+ /// Resets the term text, payload, flags, and positionIncrement,
+ /// startOffset, endOffset and token type to default.
+ ///
+ public override void Clear()
+ {
+ payload = null;
+ // Leave termBuffer to allow re-use
+ termLength = 0;
+ positionIncrement = 1;
+ flags = 0;
+ startOffset = endOffset = 0;
+ type = DEFAULT_TYPE;
+ }
+
+ public override System.Object Clone()
+ {
+ var t = (Token) base.Clone();
+ // Do a deep clone
+ if (termBuffer != null)
+ {
+ t.termBuffer = new char[termBuffer.Length];
+ termBuffer.CopyTo(t.termBuffer, 0);
+ }
+ if (payload != null)
+ {
+ t.payload = (Payload) payload.Clone();
+ }
+ return t;
+ }
+
+ /// Makes a clone, but replaces the term buffer &
+ /// start/end offset in the process. This is more
+ /// efficient than doing a full clone (and then calling
+ /// setTermBuffer) because it saves a wasted copy of the old
+ /// termBuffer.
+ ///
+ public virtual Token Clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+ {
+ var t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)
+ {positionIncrement = positionIncrement, flags = flags, type = type};
+ if (payload != null)
+ t.payload = (Payload) payload.Clone();
+ return t;
+ }
+
+ public override bool Equals(Object obj)
+ {
+ if (obj == this)
+ return true;
+
+ var other = obj as Token;
+ if (other == null)
+ return false;
+
+ InitTermBuffer();
+ other.InitTermBuffer();
+
+ if (termLength == other.termLength && startOffset == other.startOffset && endOffset == other.endOffset &&
+ flags == other.flags && positionIncrement == other.positionIncrement && SubEqual(type, other.type) &&
+ SubEqual(payload, other.payload))
+ {
+ for (int i = 0; i < termLength; i++)
+ if (termBuffer[i] != other.termBuffer[i])
+ return false;
+ return true;
+ }
+ return false;
+ }
+
+ private bool SubEqual(System.Object o1, System.Object o2)
+ {
+ if (o1 == null)
+ return o2 == null;
+ return o1.Equals(o2);
+ }
+
+ public override int GetHashCode()
+ {
+ InitTermBuffer();
+ int code = termLength;
+ code = code * 31 + startOffset;
+ code = code * 31 + endOffset;
+ code = code * 31 + flags;
+ code = code * 31 + positionIncrement;
+ code = code * 31 + type.GetHashCode();
+ code = (payload == null?code:code * 31 + payload.GetHashCode());
+ code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
+ return code;
+ }
+
+ // like clear() but doesn't clear termBuffer/text
+ private void ClearNoTermBuffer()
+ {
+ payload = null;
+ positionIncrement = 1;
+ flags = 0;
+ startOffset = endOffset = 0;
+ type = DEFAULT_TYPE;
+ }
+
+ /// Shorthand for calling ,
+ /// ,
+ /// ,
+ /// ,
+ ///
+ ///
+ /// this Token instance
+ ///
+ public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
+ {
+ ClearNoTermBuffer();
+ payload = null;
+ positionIncrement = 1;
+ SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = newType;
+ return this;
+ }
+
+ /// Shorthand for calling ,
+ /// ,
+ /// ,
+ ///
+ /// on Token.DEFAULT_TYPE
+ ///
+ /// this Token instance
+ ///
+ public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = DEFAULT_TYPE;
+ return this;
+ }
+
+ /// Shorthand for calling ,
+ /// ,
+ /// ,
+ ///
+ ///
+ ///
+ /// this Token instance
+ ///
+ public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset, System.String newType)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTerm);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = newType;
+ return this;
+ }
+
+ /// Shorthand for calling ,
+ /// ,
+ /// ,
+ ///
+ ///
+ ///
+ /// this Token instance
+ ///
+ public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTerm, newTermOffset, newTermLength);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = newType;
+ return this;
+ }
+
+ /// Shorthand for calling ,
+ /// ,
+ /// ,
+ ///
+ /// on Token.DEFAULT_TYPE
+ ///
+ /// this Token instance
+ ///
+ public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTerm);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = DEFAULT_TYPE;
+ return this;
+ }
+
+ /// Shorthand for calling ,
+ /// ,
+ /// ,
+ ///
+ /// on Token.DEFAULT_TYPE
+ ///
+ /// this Token instance
+ ///
+ public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset)
+ {
+ ClearNoTermBuffer();
+ SetTermBuffer(newTerm, newTermOffset, newTermLength);
+ startOffset = newStartOffset;
+ endOffset = newEndOffset;
+ type = DEFAULT_TYPE;
+ return this;
+ }
+
+ /// Copy the prototype token's fields into this one. Note: Payloads are shared.
+ ///
+ ///
+ public virtual void Reinit(Token prototype)
+ {
+ prototype.InitTermBuffer();
+ SetTermBuffer(prototype.termBuffer, 0, prototype.termLength);
+ positionIncrement = prototype.positionIncrement;
+ flags = prototype.flags;
+ startOffset = prototype.startOffset;
+ endOffset = prototype.endOffset;
+ type = prototype.type;
+ payload = prototype.payload;
+ }
+
+ /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+ ///
+ ///
+ ///
+ ///
+ public virtual void Reinit(Token prototype, System.String newTerm)
+ {
+ SetTermBuffer(newTerm);
+ positionIncrement = prototype.positionIncrement;
+ flags = prototype.flags;
+ startOffset = prototype.startOffset;
+ endOffset = prototype.endOffset;
+ type = prototype.type;
+ payload = prototype.payload;
+ }
+
+ /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public virtual void Reinit(Token prototype, char[] newTermBuffer, int offset, int length)
+ {
+ SetTermBuffer(newTermBuffer, offset, length);
+ positionIncrement = prototype.positionIncrement;
+ flags = prototype.flags;
+ startOffset = prototype.startOffset;
+ endOffset = prototype.endOffset;
+ type = prototype.type;
+ payload = prototype.payload;
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ if (target is Token)
+ {
+ var to = (Token) target;
+ to.Reinit(this);
+ // reinit shares the payload, so clone it:
+ if (payload != null)
+ {
+ to.payload = (Payload) payload.Clone();
+ }
+ }
+ else
+ {
+ InitTermBuffer();
+ ((ITermAttribute) target).SetTermBuffer(termBuffer, 0, termLength);
+ ((IOffsetAttribute) target).SetOffset(startOffset, endOffset);
+ ((IPositionIncrementAttribute) target).PositionIncrement = positionIncrement;
+ ((IPayloadAttribute) target).Payload = (payload == null)?null:(Payload) payload.Clone();
+ ((IFlagsAttribute) target).Flags = flags;
+ ((ITypeAttribute) target).Type = type;
+ }
+ }
+
+ ///
+ /// Convenience factory that returns Token as implementation for the basic
+ /// attributes and return the default impl (with "Impl" appended) for all other
+ /// attributes.
+ /// @since 3.0
+ ///
+ public static AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
+ new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
+
+ ///
+ /// Expert : Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
+ /// and for all other attributes calls the given delegate factory.
+ ///
+ public class TokenAttributeFactory : AttributeSource.AttributeFactory
+ {
+
+ private readonly AttributeSource.AttributeFactory _delegateFactory;
+
+ ///
+ /// Expert : Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
+ /// and for all other attributes calls the given delegate factory.
+ ///
+ public TokenAttributeFactory(AttributeSource.AttributeFactory delegateFactory)
+ {
+ this._delegateFactory = delegateFactory;
+ }
+
+ public override Attribute CreateAttributeInstance()
+ {
+ return typeof(T).IsAssignableFrom(typeof(Token))
+ ? new Token()
+ : _delegateFactory.CreateAttributeInstance();
+ }
+
+ public override bool Equals(Object other)
+ {
+ if (this == other) return true;
+
+ var af = other as TokenAttributeFactory;
+ return af != null && _delegateFactory.Equals(af._delegateFactory);
+ }
+
+ public override int GetHashCode()
+ {
+ return _delegateFactory.GetHashCode() ^ 0x0a45aa31;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/TokenFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/TokenFilter.cs
new file mode 100644
index 0000000000..7483c82623
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/TokenFilter.cs
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// A TokenFilter is a TokenStream whose input is another TokenStream.
+ ///
+ /// This is an abstract class; subclasses must override .
+ ///
+ ///
+ ///
+ ///
+ public abstract class TokenFilter:TokenStream
+ {
+ /// The source of tokens for this filter.
+ protected internal TokenStream input;
+
+ private bool isDisposed;
+
+ /// Construct a token stream filtering the given input.
+ protected internal TokenFilter(TokenStream input):base(input)
+ {
+ this.input = input;
+ }
+
+ /// Performs end-of-stream operations, if any, and calls then end() on the
+ /// input TokenStream.
+ /// NOTE: Be sure to call super.end() first when overriding this method.
+ ///
+ public override void End()
+ {
+ input.End();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (input != null)
+ {
+ input.Close();
+ }
+ }
+
+ //input = null;
+ isDisposed = true;
+ }
+
+ /// Reset the filter as well as the input TokenStream.
+ public override void Reset()
+ {
+ input.Reset();
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/TokenStream.cs b/external/Lucene.Net.Light/src/core/Analysis/TokenStream.cs
new file mode 100644
index 0000000000..c6246960c0
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/TokenStream.cs
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Document = Lucene.Net.Documents.Document;
+using Field = Lucene.Net.Documents.Field;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// A TokenStream enumerates the sequence of tokens, either from
+ /// s of a or from query text.
+ ///
+ /// This is an abstract class. Concrete subclasses are:
+ ///
+ /// , a TokenStream whose input is a Reader; and
+ /// , a TokenStream whose input is another
+ /// TokenStream .
+ ///
+ /// A new TokenStream API has been introduced with Lucene 2.9. This API
+ /// has moved from being based to based. While
+ /// still exists in 2.9 as a convenience class, the preferred way
+ /// to store the information of a is to use s.
+ ///
+ /// TokenStream now extends , which provides
+ /// access to all of the token s for the TokenStream .
+ /// Note that only one instance per is created and reused
+ /// for every token. This approach reduces object creation and allows local
+ /// caching of references to the s. See
+ /// for further details.
+ ///
+ /// The workflow of the new TokenStream API is as follows:
+ ///
+ /// - Instantiation of
TokenStream / s which add/get
+ /// attributes to/from the .
+ /// - The consumer calls
.
+ /// - The consumer retrieves attributes from the stream and stores local
+ /// references to all attributes it wants to access
+ /// - The consumer calls
until it returns false and
+ /// consumes the attributes after each call.
+ /// - The consumer calls
so that any end-of-stream operations
+ /// can be performed.
+ /// - The consumer calls
to release any resource when finished
+ /// using the TokenStream
+ ///
+ /// To make sure that filters and consumers know which attributes are available,
+ /// the attributes must be added during instantiation. Filters and consumers are
+ /// not required to check for availability of attributes in
+ /// .
+ ///
+ /// You can find some example code for the new API in the analysis package level
+ /// Javadoc.
+ ///
+ /// Sometimes it is desirable to capture a current state of a TokenStream
+ /// , e. g. for buffering purposes (see ,
+ /// ). For this usecase
+ /// and
+ /// can be used.
+ ///
+ public abstract class TokenStream : AttributeSource, IDisposable
+ {
+ /// A TokenStream using the default attribute factory.
+ protected internal TokenStream()
+ { }
+
+ /// A TokenStream that uses the same attributes as the supplied one.
+ protected internal TokenStream(AttributeSource input)
+ : base(input)
+ { }
+
+ /// A TokenStream using the supplied AttributeFactory for creating new instances.
+ protected internal TokenStream(AttributeFactory factory)
+ : base(factory)
+ { }
+
+ /// Consumers (i.e., ) use this method to advance the stream to
+ /// the next token. Implementing classes must implement this method and update
+ /// the appropriate s with the attributes of the next
+ /// token.
+ ///
+ /// The producer must make no assumptions about the attributes after the
+ /// method has been returned: the caller may arbitrarily change it. If the
+ /// producer needs to preserve the state for subsequent calls, it can use
+ /// to create a copy of the current attribute state.
+ ///
+ /// This method is called for every token of a document, so an efficient
+ /// implementation is crucial for good performance. To avoid calls to
+ /// and ,
+ /// references to all s that this stream uses should be
+ /// retrieved during instantiation.
+ ///
+ /// To ensure that filters and consumers know which attributes are available,
+ /// the attributes must be added during instantiation. Filters and consumers
+ /// are not required to check for availability of attributes in
+ /// .
+ ///
+ ///
+ /// false for end of stream; true otherwise
+ public abstract bool IncrementToken();
+
+ /// This method is called by the consumer after the last token has been
+ /// consumed, after returned false
+ /// (using the new TokenStream API). Streams implementing the old API
+ /// should upgrade to use this feature.
+ ///
+ /// This method can be used to perform any end-of-stream operations, such as
+ /// setting the final offset of a stream. The final offset of a stream might
+ /// differ from the offset of the last token eg in case one or more whitespaces
+ /// followed after the last token, but a was used.
+ ///
+ ///
+ /// IOException
+ public virtual void End()
+ {
+ // do nothing by default
+ }
+
+ /// Resets this stream to the beginning. This is an optional operation, so
+ /// subclasses may or may not implement this method. is not needed for
+ /// the standard indexing process. However, if the tokens of a
+ /// TokenStream are intended to be consumed more than once, it is
+ /// necessary to implement . Note that if your TokenStream
+ /// caches tokens and feeds them back again after a reset, it is imperative
+ /// that you clone the tokens when you store them away (on the first pass) as
+ /// well as when you return them (on future passes after ).
+ ///
+ public virtual void Reset()
+ {
+ }
+
+ /// Releases resources associated with this stream.
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/FlagsAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/FlagsAttribute.cs
new file mode 100644
index 0000000000..b5c4b7bdc0
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/FlagsAttribute.cs
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// This attribute can be used to pass different flags down the tokenizer chain,
+ /// eg from one TokenFilter to another one.
+ ///
+ [Serializable]
+ public class FlagsAttribute:Util.Attribute, IFlagsAttribute, System.ICloneable
+ {
+ private int flags = 0;
+
+ /// EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
+ ///
+ ///
+ /// Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes.
+ /// The flags can be used to encode information about the token for use by other s.
+ ///
+ ///
+ ///
+ /// The bits
+ public virtual int Flags
+ {
+ get { return flags; }
+ set { this.flags = value; }
+ }
+
+ public override void Clear()
+ {
+ flags = 0;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (this == other)
+ {
+ return true;
+ }
+
+ if (other is FlagsAttribute)
+ {
+ return ((FlagsAttribute) other).flags == flags;
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return flags;
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ IFlagsAttribute t = (IFlagsAttribute) target;
+ t.Flags = flags;
+ }
+
+ override public System.Object Clone()
+ {
+ FlagsAttribute impl = new FlagsAttribute();
+ impl.flags = this.flags;
+ return impl;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs
new file mode 100644
index 0000000000..24b2bea5af
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Tokenizer = Lucene.Net.Analysis.Tokenizer;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// This attribute can be used to pass different flags down the chain,
+ /// eg from one TokenFilter to another one.
+ ///
+ public interface IFlagsAttribute:IAttribute
+ {
+ /// EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
+ ///
+ ///
+ /// Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes.
+ /// The flags can be used to encode information about the token for use by other s.
+ ///
+ ///
+ ///
+ /// The bits
+ int Flags { get; set; }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs
new file mode 100644
index 0000000000..ffbbe02476
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// The start and end character offset of a Token.
+ public interface IOffsetAttribute : IAttribute
+ {
+ /// Returns this Token's starting offset, the position of the first character
+ /// corresponding to this token in the source text.
+ /// Note that the difference between endOffset() and startOffset() may not be
+ /// equal to termText.length(), as the term text may have been altered by a
+ /// stemmer or some other filter.
+ ///
+ int StartOffset { get; }
+
+
+ /// Set the starting and ending offset.
+ /// See StartOffset() and EndOffset()
+ ///
+ void SetOffset(int startOffset, int endOffset);
+
+
+ /// Returns this Token's ending offset, one greater than the position of the
+ /// last character corresponding to this token in the source text. The length
+ /// of the token in the source text is (endOffset - startOffset).
+ ///
+ int EndOffset { get; }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs
new file mode 100644
index 0000000000..7e313ce31c
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+using Payload = Lucene.Net.Index.Payload;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// The payload of a Token. See also .
+ public interface IPayloadAttribute:IAttribute
+ {
+ /// Returns this Token's payload.
+ Payload Payload { get; set; }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs
new file mode 100644
index 0000000000..6c2a131b47
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// The positionIncrement determines the position of this token
+ /// relative to the previous Token in a TokenStream, used in phrase
+ /// searching.
+ ///
+ ///
The default value is one.
+ ///
+ ///
Some common uses for this are:
+ ///
+ /// - Set it to zero to put multiple terms in the same position. This is
+ /// useful if, e.g., a word has multiple stems. Searches for phrases
+ /// including either stem will match. In this case, all but the first stem's
+ /// increment should be set to zero: the increment of the first instance
+ /// should be one. Repeating a token with an increment of zero can also be
+ /// used to boost the scores of matches on that token.
+ ///
+ /// - Set it to values greater than one to inhibit exact phrase matches.
+ /// If, for example, one does not want phrases to match across removed stop
+ /// words, then one could build a stop word filter that removes stop words and
+ /// also sets the increment to the number of stop words removed before each
+ /// non-stop word. Then exact phrase queries will only match when the terms
+ /// occur with no intervening stop words.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public interface IPositionIncrementAttribute:IAttribute
+ {
+ /// Gets or sets the position increment. The default value is one.
+ ///
+ ///
+ /// the distance from the prior term
+ int PositionIncrement { set; get; }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITermAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITermAttribute.cs
new file mode 100644
index 0000000000..8f9b030339
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITermAttribute.cs
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// The term text of a Token.
+ public interface ITermAttribute:IAttribute
+ {
+ /// Returns the Token's term text.
+ ///
+ /// This method has a performance penalty
+ /// because the text is stored internally in a char[]. If
+ /// possible, use and
+ /// directly instead. If you really need a
+ /// String, use this method, which is nothing more than
+ /// a convenience call to new String(token.termBuffer(), 0, token.termLength())
+ ///
+ string Term { get; }
+
+ /// Copies the contents of buffer, starting at offset for
+ /// length characters, into the termBuffer array.
+ ///
+ /// the buffer to copy
+ ///
+ /// the index in the buffer of the first character to copy
+ ///
+ /// the number of characters to copy
+ ///
+ void SetTermBuffer(char[] buffer, int offset, int length);
+
+ /// Copies the contents of buffer into the termBuffer array.
+ /// the buffer to copy
+ ///
+ void SetTermBuffer(System.String buffer);
+
+ /// Copies the contents of buffer, starting at offset and continuing
+ /// for length characters, into the termBuffer array.
+ ///
+ /// the buffer to copy
+ ///
+ /// the index in the buffer of the first character to copy
+ ///
+ /// the number of characters to copy
+ ///
+ void SetTermBuffer(System.String buffer, int offset, int length);
+
+ /// Returns the internal termBuffer character array which
+ /// you can then directly alter. If the array is too
+ /// small for your token, use
+ /// to increase it. After
+ /// altering the buffer be sure to call
+ /// to record the number of valid
+ /// characters that were placed into the termBuffer.
+ ///
+ char[] TermBuffer();
+
+ /// Grows the termBuffer to at least size newSize, preserving the
+ /// existing content. Note: If the next operation is to change
+ /// the contents of the term buffer use
+ /// ,
+ /// , or
+ ///
+ /// to optimally combine the resize with the setting of the termBuffer.
+ ///
+ /// minimum size of the new termBuffer
+ ///
+ /// newly created termBuffer with length >= newSize
+ ///
+ char[] ResizeTermBuffer(int newSize);
+
+ /// Return number of valid characters (length of the term)
+ /// in the termBuffer array.
+ ///
+ int TermLength();
+
+ /// Set number of valid characters (length of the term) in
+ /// the termBuffer array. Use this to truncate the termBuffer
+ /// or to synchronize with external manipulation of the termBuffer.
+ /// Note: to grow the size of the array,
+ /// use first.
+ ///
+ /// the truncated length
+ ///
+ void SetTermLength(int length);
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITypeAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITypeAttribute.cs
new file mode 100644
index 0000000000..48bcc10062
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITypeAttribute.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// A Token's lexical type. The Default value is "word".
+ public interface ITypeAttribute:IAttribute
+ {
+ /// Gets or sets this Token's lexical type. Defaults to "word".
+ string Type { get; set; }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/OffsetAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/OffsetAttribute.cs
new file mode 100644
index 0000000000..51495599dd
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/OffsetAttribute.cs
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// The start and end character offset of a Token.
+ [Serializable]
+ public class OffsetAttribute:Attribute, IOffsetAttribute, System.ICloneable
+ {
+ private int startOffset;
+ private int endOffset;
+
+ /// Returns this Token's starting offset, the position of the first character
+ /// corresponding to this token in the source text.
+ /// Note that the difference between endOffset() and startOffset() may not be
+ /// equal to termText.length(), as the term text may have been altered by a
+ /// stemmer or some other filter.
+ ///
+ public virtual int StartOffset
+ {
+ get { return startOffset; }
+ }
+
+
+ /// Set the starting and ending offset.
+ /// See StartOffset() and EndOffset()
+ ///
+ public virtual void SetOffset(int startOffset, int endOffset)
+ {
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
+ }
+
+
+ /// Returns this Token's ending offset, one greater than the position of the
+ /// last character corresponding to this token in the source text. The length
+ /// of the token in the source text is (endOffset - startOffset).
+ ///
+ public virtual int EndOffset
+ {
+ get { return endOffset; }
+ }
+
+
+ public override void Clear()
+ {
+ startOffset = 0;
+ endOffset = 0;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is OffsetAttribute)
+ {
+ OffsetAttribute o = (OffsetAttribute) other;
+ return o.startOffset == startOffset && o.endOffset == endOffset;
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ int code = startOffset;
+ code = code * 31 + endOffset;
+ return code;
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ IOffsetAttribute t = (IOffsetAttribute) target;
+ t.SetOffset(startOffset, endOffset);
+ }
+
+ override public System.Object Clone()
+ {
+ OffsetAttribute impl = new OffsetAttribute();
+ impl.endOffset = endOffset;
+ impl.startOffset = startOffset;
+ return impl;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PayloadAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PayloadAttribute.cs
new file mode 100644
index 0000000000..ae1c4d93f8
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PayloadAttribute.cs
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+using Payload = Lucene.Net.Index.Payload;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// The payload of a Token. See also .
+ [Serializable]
+ public class PayloadAttribute:Attribute, IPayloadAttribute, System.ICloneable
+ {
+ private Payload payload;
+
+ /// Initialize this attribute with no payload.
+ public PayloadAttribute()
+ {
+ }
+
+ /// Initialize this attribute with the given payload.
+ public PayloadAttribute(Payload payload)
+ {
+ this.payload = payload;
+ }
+
+ /// Returns this Token's payload.
+ public virtual Payload Payload
+ {
+ get { return this.payload; }
+ set { this.payload = value; }
+ }
+
+ public override void Clear()
+ {
+ payload = null;
+ }
+
+ public override System.Object Clone()
+ {
+ var clone = (PayloadAttribute) base.Clone();
+ if (payload != null)
+ {
+ clone.payload = (Payload) payload.Clone();
+ }
+ return clone;
+ // TODO: This code use to be as below. Any reason why? the if(payload!=null) was missing...
+ //PayloadAttributeImpl impl = new PayloadAttributeImpl();
+ //impl.payload = new Payload(this.payload.data, this.payload.offset, this.payload.length);
+ //return impl;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is IPayloadAttribute)
+ {
+ PayloadAttribute o = (PayloadAttribute) other;
+ if (o.payload == null || payload == null)
+ {
+ return o.payload == null && payload == null;
+ }
+
+ return o.payload.Equals(payload);
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return (payload == null)?0:payload.GetHashCode();
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ IPayloadAttribute t = (IPayloadAttribute) target;
+ t.Payload = (payload == null)?null:(Payload) payload.Clone();
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs
new file mode 100644
index 0000000000..4f7a04fbf9
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// The positionIncrement determines the position of this token
+ /// relative to the previous Token in a , used in phrase
+ /// searching.
+ ///
+ ///
The default value is one.
+ ///
+ ///
Some common uses for this are:
+ ///
+ /// - Set it to zero to put multiple terms in the same position. This is
+ /// useful if, e.g., a word has multiple stems. Searches for phrases
+ /// including either stem will match. In this case, all but the first stem's
+ /// increment should be set to zero: the increment of the first instance
+ /// should be one. Repeating a token with an increment of zero can also be
+ /// used to boost the scores of matches on that token.
+ ///
+ /// - Set it to values greater than one to inhibit exact phrase matches.
+ /// If, for example, one does not want phrases to match across removed stop
+ /// words, then one could build a stop word filter that removes stop words and
+ /// also sets the increment to the number of stop words removed before each
+ /// non-stop word. Then exact phrase queries will only match when the terms
+ /// occur with no intervening stop words.
+ ///
+ ///
+ ///
+ [Serializable]
+ public class PositionIncrementAttribute:Attribute, IPositionIncrementAttribute, System.ICloneable
+ {
+ private int positionIncrement = 1;
+
+ /// Set the position increment. The default value is one.
+ ///
+ ///
+ /// the distance from the prior term
+ public virtual int PositionIncrement
+ {
+ set
+ {
+ if (value < 0)
+ throw new System.ArgumentException("Increment must be zero or greater: " + value);
+ this.positionIncrement = value;
+ }
+ get { return positionIncrement; }
+ }
+
+ public override void Clear()
+ {
+ this.positionIncrement = 1;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is PositionIncrementAttribute)
+ {
+ return positionIncrement == ((PositionIncrementAttribute) other).positionIncrement;
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return positionIncrement;
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ IPositionIncrementAttribute t = (IPositionIncrementAttribute) target;
+ t.PositionIncrement = positionIncrement;
+ }
+
+ override public System.Object Clone()
+ {
+ PositionIncrementAttribute impl = new PositionIncrementAttribute();
+ impl.positionIncrement = positionIncrement;
+ return impl;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TermAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TermAttribute.cs
new file mode 100644
index 0000000000..f95402c179
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TermAttribute.cs
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Support;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// The term text of a Token.
+ [Serializable]
+ public class TermAttribute:Attribute, ITermAttribute, System.ICloneable
+ {
+ private static int MIN_BUFFER_SIZE = 10;
+
+ private char[] termBuffer;
+ private int termLength;
+
+ /// Returns the Token's term text.
+ ///
+ /// This method has a performance penalty
+ /// because the text is stored internally in a char[]. If
+ /// possible, use and
+ /// directly instead. If you
+ /// really need a String, use this method, which is nothing more than
+ /// a convenience call to new String(token.termBuffer(), 0, token.termLength())
+ ///
+ public virtual string Term
+ {
+ get
+ {
+ InitTermBuffer();
+ return new System.String(termBuffer, 0, termLength);
+ }
+ }
+
+ /// Copies the contents of buffer, starting at offset for
+ /// length characters, into the termBuffer array.
+ ///
+ /// the buffer to copy
+ ///
+ /// the index in the buffer of the first character to copy
+ ///
+ /// the number of characters to copy
+ ///
+ public virtual void SetTermBuffer(char[] buffer, int offset, int length)
+ {
+ GrowTermBuffer(length);
+ Array.Copy(buffer, offset, termBuffer, 0, length);
+ termLength = length;
+ }
+
+ /// Copies the contents of buffer into the termBuffer array.
+ /// the buffer to copy
+ ///
+ public virtual void SetTermBuffer(System.String buffer)
+ {
+ int length = buffer.Length;
+ GrowTermBuffer(length);
+ TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0);
+ termLength = length;
+ }
+
+ /// Copies the contents of buffer, starting at offset and continuing
+ /// for length characters, into the termBuffer array.
+ ///
+ /// the buffer to copy
+ ///
+ /// the index in the buffer of the first character to copy
+ ///
+ /// the number of characters to copy
+ ///
+ public virtual void SetTermBuffer(System.String buffer, int offset, int length)
+ {
+ System.Diagnostics.Debug.Assert(offset <= buffer.Length);
+ System.Diagnostics.Debug.Assert(offset + length <= buffer.Length);
+ GrowTermBuffer(length);
+ TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0);
+ termLength = length;
+ }
+
+ /// Returns the internal termBuffer character array which
+ /// you can then directly alter. If the array is too
+ /// small for your token, use
+ /// to increase it. After
+ /// altering the buffer be sure to call
+ /// to record the number of valid
+ /// characters that were placed into the termBuffer.
+ ///
+ public virtual char[] TermBuffer()
+ {
+ InitTermBuffer();
+ return termBuffer;
+ }
+
+ /// Grows the termBuffer to at least size newSize, preserving the
+ /// existing content. Note: If the next operation is to change
+ /// the contents of the term buffer use
+ /// ,
+ /// , or
+ ///
+ /// to optimally combine the resize with the setting of the termBuffer.
+ ///
+ /// minimum size of the new termBuffer
+ ///
+ /// newly created termBuffer with length >= newSize
+ ///
+ public virtual char[] ResizeTermBuffer(int newSize)
+ {
+ if (termBuffer == null)
+ {
+ // The buffer is always at least MIN_BUFFER_SIZE
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+ }
+ else
+ {
+ if (termBuffer.Length < newSize)
+ {
+ // Not big enough; create a new array with slight
+ // over allocation and preserve content
+ char[] newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+ Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length);
+ termBuffer = newCharBuffer;
+ }
+ }
+ return termBuffer;
+ }
+
+
+ /// Allocates a buffer char[] of at least newSize, without preserving the existing content.
+ /// its always used in places that set the content
+ ///
+ /// minimum size of the buffer
+ ///
+ private void GrowTermBuffer(int newSize)
+ {
+ if (termBuffer == null)
+ {
+ // The buffer is always at least MIN_BUFFER_SIZE
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)];
+ }
+ else
+ {
+ if (termBuffer.Length < newSize)
+ {
+ // Not big enough; create a new array with slight
+ // over allocation:
+ termBuffer = new char[ArrayUtil.GetNextSize(newSize)];
+ }
+ }
+ }
+
+ private void InitTermBuffer()
+ {
+ if (termBuffer == null)
+ {
+ termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)];
+ termLength = 0;
+ }
+ }
+
+ /// Return number of valid characters (length of the term)
+ /// in the termBuffer array.
+ ///
+ public virtual int TermLength()
+ {
+ return termLength;
+ }
+
+ /// Set number of valid characters (length of the term) in
+ /// the termBuffer array. Use this to truncate the termBuffer
+ /// or to synchronize with external manipulation of the termBuffer.
+ /// Note: to grow the size of the array,
+ /// use first.
+ ///
+ /// the truncated length
+ ///
+ public virtual void SetTermLength(int length)
+ {
+ InitTermBuffer();
+ if (length > termBuffer.Length)
+ throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")");
+ termLength = length;
+ }
+
+ public override int GetHashCode()
+ {
+ InitTermBuffer();
+ int code = termLength;
+ code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength);
+ return code;
+ }
+
+ public override void Clear()
+ {
+ termLength = 0;
+ }
+
+ public override System.Object Clone()
+ {
+ TermAttribute t = (TermAttribute) base.Clone();
+ // Do a deep clone
+ if (termBuffer != null)
+ {
+ t.termBuffer = new char[termBuffer.Length];
+ termBuffer.CopyTo(t.termBuffer, 0);
+ }
+ return t;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is ITermAttribute)
+ {
+ InitTermBuffer();
+ TermAttribute o = ((TermAttribute) other);
+ o.InitTermBuffer();
+
+ if (termLength != o.termLength)
+ return false;
+ for (int i = 0; i < termLength; i++)
+ {
+ if (termBuffer[i] != o.termBuffer[i])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ public override System.String ToString()
+ {
+ InitTermBuffer();
+ return "term=" + new System.String(termBuffer, 0, termLength);
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ InitTermBuffer();
+ ITermAttribute t = (ITermAttribute) target;
+ t.SetTermBuffer(termBuffer, 0, termLength);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TypeAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TypeAttribute.cs
new file mode 100644
index 0000000000..1da1c50f89
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TypeAttribute.cs
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Attribute = Lucene.Net.Util.Attribute;
+
+namespace Lucene.Net.Analysis.Tokenattributes
+{
+
+ /// A Token's lexical type. The Default value is "word".
+ [Serializable]
+ public class TypeAttribute:Attribute, ITypeAttribute, System.ICloneable
+ {
+ private System.String type;
+ public const System.String DEFAULT_TYPE = "word";
+
+ public TypeAttribute():this(DEFAULT_TYPE)
+ {
+ }
+
+ public TypeAttribute(System.String type)
+ {
+ this.type = type;
+ }
+
+ /// Returns this Token's lexical type. Defaults to "word".
+ public virtual string Type
+ {
+ get { return type; }
+ set { this.type = value; }
+ }
+
+ public override void Clear()
+ {
+ type = DEFAULT_TYPE;
+ }
+
+ public override bool Equals(System.Object other)
+ {
+ if (other == this)
+ {
+ return true;
+ }
+
+ if (other is TypeAttribute)
+ {
+ return type.Equals(((TypeAttribute) other).type);
+ }
+
+ return false;
+ }
+
+ public override int GetHashCode()
+ {
+ return type.GetHashCode();
+ }
+
+ public override void CopyTo(Attribute target)
+ {
+ ITypeAttribute t = (ITypeAttribute) target;
+ t.Type = type;
+ }
+
+ override public System.Object Clone()
+ {
+ TypeAttribute impl = new TypeAttribute();
+ impl.type = type;
+ return impl;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenizer.cs
new file mode 100644
index 0000000000..5ab741ef18
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenizer.cs
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// A Tokenizer is a TokenStream whose input is a Reader.
+ ///
+ /// This is an abstract class; subclasses must override
+ ///
+ /// NOTE: Subclasses overriding must call
+ /// before setting attributes.
+ ///
+
+ public abstract class Tokenizer:TokenStream
+ {
+ /// The text source for this Tokenizer.
+ protected internal System.IO.TextReader input;
+
+ private bool isDisposed;
+
+ /// Construct a tokenizer with null input.
+ protected internal Tokenizer()
+ {
+ }
+
+ /// Construct a token stream processing the given input.
+ protected internal Tokenizer(System.IO.TextReader input)
+ {
+ this.input = CharReader.Get(input);
+ }
+
+ /// Construct a tokenizer with null input using the given AttributeFactory.
+ protected internal Tokenizer(AttributeFactory factory):base(factory)
+ {
+ }
+
+ /// Construct a token stream processing the given input using the given AttributeFactory.
+ protected internal Tokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory)
+ {
+ this.input = CharReader.Get(input);
+ }
+
+ /// Construct a token stream processing the given input using the given AttributeSource.
+ protected internal Tokenizer(AttributeSource source):base(source)
+ {
+ }
+
+ /// Construct a token stream processing the given input using the given AttributeSource.
+ protected internal Tokenizer(AttributeSource source, System.IO.TextReader input):base(source)
+ {
+ this.input = CharReader.Get(input);
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (input != null)
+ {
+ input.Close();
+ }
+ }
+
+ // LUCENE-2387: don't hold onto Reader after close, so
+ // GC can reclaim
+ input = null;
+ isDisposed = true;
+ }
+
+ /// Return the corrected offset. If is a subclass
+ /// this method calls , else returns currentOff .
+ ///
+ /// offset as seen in the output
+ ///
+ /// corrected offset based on the input
+ ///
+ ///
+ ///
+ protected internal int CorrectOffset(int currentOff)
+ {
+ return (input is CharStream)?((CharStream) input).CorrectOffset(currentOff):currentOff;
+ }
+
+ /// Expert: Reset the tokenizer to a new reader. Typically, an
+ /// analyzer (in its reusableTokenStream method) will use
+ /// this to re-use a previously created tokenizer.
+ ///
+ public virtual void Reset(System.IO.TextReader input)
+ {
+ this.input = input;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/WhitespaceAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/WhitespaceAnalyzer.cs
new file mode 100644
index 0000000000..77dbaa3511
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/WhitespaceAnalyzer.cs
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace Lucene.Net.Analysis
+{
+
+ /// An Analyzer that uses .
+
+ public sealed class WhitespaceAnalyzer:Analyzer
+ {
+ public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ return new WhitespaceTokenizer(reader);
+ }
+
+ public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
+ {
+ var tokenizer = (Tokenizer) PreviousTokenStream;
+ if (tokenizer == null)
+ {
+ tokenizer = new WhitespaceTokenizer(reader);
+ PreviousTokenStream = tokenizer;
+ }
+ else
+ tokenizer.Reset(reader);
+ return tokenizer;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/WhitespaceTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/WhitespaceTokenizer.cs
new file mode 100644
index 0000000000..c96ad50f94
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/WhitespaceTokenizer.cs
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using AttributeSource = Lucene.Net.Util.AttributeSource;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
+ /// Adjacent sequences of non-Whitespace characters form tokens.
+ ///
+
+ public class WhitespaceTokenizer:CharTokenizer
+ {
+ /// Construct a new WhitespaceTokenizer.
+ public WhitespaceTokenizer(System.IO.TextReader @in)
+ : base(@in)
+ {
+ }
+
+ /// Construct a new WhitespaceTokenizer using a given .
+ public WhitespaceTokenizer(AttributeSource source, System.IO.TextReader @in)
+ : base(source, @in)
+ {
+ }
+
+ /// Construct a new WhitespaceTokenizer using a given .
+ public WhitespaceTokenizer(AttributeFactory factory, System.IO.TextReader @in)
+ : base(factory, @in)
+ {
+ }
+
+ /// Collects only characters which do not satisfy
+ /// .
+ ///
+ protected internal override bool IsTokenChar(char c)
+ {
+ return !System.Char.IsWhiteSpace(c);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Analysis/WordlistLoader.cs b/external/Lucene.Net.Light/src/core/Analysis/WordlistLoader.cs
new file mode 100644
index 0000000000..bfd1b07471
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Analysis/WordlistLoader.cs
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+
+namespace Lucene.Net.Analysis
+{
+
+ /// Loader for text files that represent a list of stopwords.
+ public class WordlistLoader
+ {
+
+ /// Loads a text file and adds every line as an entry to a HashSet (omitting
+ /// leading and trailing whitespace). Every line of the file should contain only
+ /// one word. The words need to be in lowercase if you make use of an
+ /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ ///
+ /// File containing the wordlist
+ /// A HashSet with the file's words
+ public static ISet GetWordSet(System.IO.FileInfo wordfile)
+ {
+ using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default))
+ {
+ return GetWordSet(reader);
+ }
+ }
+
+ /// Loads a text file and adds every non-comment line as an entry to a HashSet (omitting
+ /// leading and trailing whitespace). Every line of the file should contain only
+ /// one word. The words need to be in lowercase if you make use of an
+ /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ ///
+ /// File containing the wordlist
+ /// The comment string to ignore
+ /// A HashSet with the file's words
+ public static ISet GetWordSet(System.IO.FileInfo wordfile, System.String comment)
+ {
+ using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default))
+ {
+ return GetWordSet(reader, comment);
+ }
+ }
+
+
+ /// Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
+ /// leading and trailing whitespace). Every line of the Reader should contain only
+ /// one word. The words need to be in lowercase if you make use of an
+ /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ ///
+ /// Reader containing the wordlist
+ /// A HashSet with the reader's words
+ public static ISet GetWordSet(System.IO.TextReader reader)
+ {
+ var result = Support.Compatibility.SetFactory.CreateHashSet();
+
+ System.String word;
+ while ((word = reader.ReadLine()) != null)
+ {
+ result.Add(word.Trim());
+ }
+
+ return result;
+ }
+
+ /// Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting
+ /// leading and trailing whitespace). Every line of the Reader should contain only
+ /// one word. The words need to be in lowercase if you make use of an
+ /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+ ///
+ ///
+ /// Reader containing the wordlist
+ ///
+ /// The string representing a comment.
+ ///
+ /// A HashSet with the reader's words
+ ///
+ public static ISet GetWordSet(System.IO.TextReader reader, System.String comment)
+ {
+ var result = Support.Compatibility.SetFactory.CreateHashSet();
+
+ System.String word = null;
+ while ((word = reader.ReadLine()) != null)
+ {
+ if (word.StartsWith(comment) == false)
+ {
+ result.Add(word.Trim());
+ }
+ }
+
+ return result;
+ }
+
+
+
+ /// Reads a stem dictionary. Each line contains:
+ /// word\t stem
+ /// (i.e. two tab seperated words)
+ ///
+ ///
+ /// stem dictionary that overrules the stemming algorithm
+ ///
+ /// IOException
+ public static Dictionary GetStemDict(System.IO.FileInfo wordstemfile)
+ {
+ if (wordstemfile == null)
+ throw new System.NullReferenceException("wordstemfile may not be null");
+ var result = new Dictionary();
+ System.IO.StreamReader br = null;
+ System.IO.StreamReader fr = null;
+ try
+ {
+ fr = new System.IO.StreamReader(wordstemfile.FullName, System.Text.Encoding.Default);
+ br = new System.IO.StreamReader(fr.BaseStream, fr.CurrentEncoding);
+ System.String line;
+ char[] tab = {'\t'};
+ while ((line = br.ReadLine()) != null)
+ {
+ System.String[] wordstem = line.Split(tab, 2);
+ result[wordstem[0]] = wordstem[1];
+ }
+ }
+ finally
+ {
+ if (fr != null)
+ fr.Close();
+ if (br != null)
+ br.Close();
+ }
+ return result;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/AbstractField.cs b/external/Lucene.Net.Light/src/core/Document/AbstractField.cs
new file mode 100644
index 0000000000..a526f1de2d
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/AbstractField.cs
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using StringHelper = Lucene.Net.Util.StringHelper;
+using PhraseQuery = Lucene.Net.Search.PhraseQuery;
+using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
+
+namespace Lucene.Net.Documents
+{
+ ///
+ ///
+ ///
+ ///
+ [Serializable]
+ public abstract class AbstractField : IFieldable
+ {
+
+ protected internal System.String internalName = "body";
+ protected internal bool storeTermVector = false;
+ protected internal bool storeOffsetWithTermVector = false;
+ protected internal bool storePositionWithTermVector = false;
+ protected internal bool internalOmitNorms = false;
+ protected internal bool internalIsStored = false;
+ protected internal bool internalIsIndexed = true;
+ protected internal bool internalIsTokenized = true;
+ protected internal bool internalIsBinary = false;
+ protected internal bool lazy = false;
+ protected internal bool internalOmitTermFreqAndPositions = false;
+ protected internal float internalBoost = 1.0f;
+ // the data object for all different kind of field values
+ protected internal System.Object fieldsData = null;
+ // pre-analyzed tokenStream for indexed fields
+ protected internal TokenStream tokenStream;
+ // length/offset for all primitive types
+ protected internal int internalBinaryLength;
+ protected internal int internalbinaryOffset;
+
+ protected internal AbstractField()
+ {
+ }
+
+ protected internal AbstractField(System.String name, Field.Store store, Field.Index index, Field.TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ this.internalName = StringHelper.Intern(name); // field names are interned
+
+ this.internalIsStored = store.IsStored();
+ this.internalIsIndexed = index.IsIndexed();
+ this.internalIsTokenized = index.IsAnalyzed();
+ this.internalOmitNorms = index.OmitNorms();
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// Gets or sets the boost factor for hits for this field.
+ ///
+ ///
The default value is 1.0.
+ ///
+ ///
Note: this value is not stored directly with the document in the index.
+ /// Documents returned from and
+ /// may thus not have the same value present as when
+ /// this field was indexed.
+ ///
+ public virtual float Boost
+ {
+ get { return internalBoost; }
+ set { this.internalBoost = value; }
+ }
+
+ /// Returns the name of the field as an interned string.
+ /// For example "date", "title", "body", ...
+ ///
+ public virtual string Name
+ {
+ get { return internalName; }
+ }
+
+ protected internal virtual void SetStoreTermVector(Field.TermVector termVector)
+ {
+ this.storeTermVector = termVector.IsStored();
+ this.storePositionWithTermVector = termVector.WithPositions();
+ this.storeOffsetWithTermVector = termVector.WithOffsets();
+ }
+
+ /// True iff the value of the field is to be stored in the index for return
+ /// with search hits. It is an error for this to be true if a field is
+ /// Reader-valued.
+ ///
+ public bool IsStored
+ {
+ get { return internalIsStored; }
+ }
+
+ /// True iff the value of the field is to be indexed, so that it may be
+ /// searched on.
+ ///
+ public bool IsIndexed
+ {
+ get { return internalIsIndexed; }
+ }
+
+ /// True iff the value of the field should be tokenized as text prior to
+ /// indexing. Un-tokenized fields are indexed as a single word and may not be
+ /// Reader-valued.
+ ///
+ public bool IsTokenized
+ {
+ get { return internalIsTokenized; }
+ }
+
+ /// True iff the term or terms used to index this field are stored as a term
+ /// vector, available from .
+ /// These methods do not provide access to the original content of the field,
+ /// only to terms used to index it. If the original content must be
+ /// preserved, use the stored attribute instead.
+ ///
+ ///
+ ///
+ ///
+ public bool IsTermVectorStored
+ {
+ get { return storeTermVector; }
+ }
+
+ /// True iff terms are stored as term vector together with their offsets
+ /// (start and end position in source text).
+ ///
+ public virtual bool IsStoreOffsetWithTermVector
+ {
+ get { return storeOffsetWithTermVector; }
+ }
+
+ /// True iff terms are stored as term vector together with their token positions.
+ public virtual bool IsStorePositionWithTermVector
+ {
+ get { return storePositionWithTermVector; }
+ }
+
+ /// True iff the value of the filed is stored as binary
+ public bool IsBinary
+ {
+ get { return internalIsBinary; }
+ }
+
+
+ /// Return the raw byte[] for the binary field. Note that
+ /// you must also call and
+ /// to know which range of bytes in this
+ /// returned array belong to the field.
+ ///
+ /// reference to the Field value as byte[].
+ public virtual byte[] GetBinaryValue()
+ {
+ return GetBinaryValue(null);
+ }
+
+ public virtual byte[] GetBinaryValue(byte[] result)
+ {
+ if (internalIsBinary || fieldsData is byte[])
+ return (byte[]) fieldsData;
+ else
+ return null;
+ }
+
+ /// Returns length of byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ ///
+ /// length of byte[] segment that represents this Field value
+ public virtual int BinaryLength
+ {
+ get
+ {
+ if (internalIsBinary)
+ {
+ return internalBinaryLength;
+ }
+ return fieldsData is byte[] ? ((byte[]) fieldsData).Length : 0;
+ }
+ }
+
+ /// Returns offset into byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ ///
+ /// index of the first character in byte[] segment that represents this Field value
+ public virtual int BinaryOffset
+ {
+ get { return internalbinaryOffset; }
+ }
+
+ /// True if norms are omitted for this indexed field
+ public virtual bool OmitNorms
+ {
+ get { return internalOmitNorms; }
+ set { this.internalOmitNorms = value; }
+ }
+
+ /// Expert:
+ ///
+ /// If set, omit term freq, positions and payloads from
+ /// postings for this field.
+ ///
+ ///
NOTE : While this option reduces storage space
+ /// required in the index, it also means any query
+ /// requiring positional information, such as
+ /// or subclasses will
+ /// silently fail to find results.
+ ///
+ public virtual bool OmitTermFreqAndPositions
+ {
+ set { this.internalOmitTermFreqAndPositions = value; }
+ get { return internalOmitTermFreqAndPositions; }
+ }
+
+ public virtual bool IsLazy
+ {
+ get { return lazy; }
+ }
+
+ /// Prints a Field for human consumption.
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder result = new System.Text.StringBuilder();
+ if (internalIsStored)
+ {
+ result.Append("stored");
+ }
+ if (internalIsIndexed)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("indexed");
+ }
+ if (internalIsTokenized)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("tokenized");
+ }
+ if (storeTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVector");
+ }
+ if (storeOffsetWithTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVectorOffsets");
+ }
+ if (storePositionWithTermVector)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("termVectorPosition");
+ }
+ if (internalIsBinary)
+ {
+ if (result.Length > 0)
+ result.Append(",");
+ result.Append("binary");
+ }
+ if (internalOmitNorms)
+ {
+ result.Append(",omitNorms");
+ }
+ if (internalOmitTermFreqAndPositions)
+ {
+ result.Append(",omitTermFreqAndPositions");
+ }
+ if (lazy)
+ {
+ result.Append(",lazy");
+ }
+ result.Append('<');
+ result.Append(internalName);
+ result.Append(':');
+
+ if (fieldsData != null && lazy == false)
+ {
+ result.Append(fieldsData);
+ }
+
+ result.Append('>');
+ return result.ToString();
+ }
+
+ public abstract TokenStream TokenStreamValue { get; }
+ public abstract TextReader ReaderValue { get; }
+ public abstract string StringValue { get; }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/CompressionTools.cs b/external/Lucene.Net.Light/src/core/Document/CompressionTools.cs
new file mode 100644
index 0000000000..400633f3aa
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/CompressionTools.cs
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+// To enable compression support in Lucene.Net ,
+// you will need to define 'SHARP_ZIP_LIB' and reference the SharpLibZip
+// library. The SharpLibZip library can be downloaded from:
+// http://www.icsharpcode.net/OpenSource/SharpZipLib/
+
+using System;
+using Lucene.Net.Support;
+using UnicodeUtil = Lucene.Net.Util.UnicodeUtil;
+
+namespace Lucene.Net.Documents
+{
+
+ /// Simple utility class providing static methods to
+ /// compress and decompress binary data for stored fields.
+ /// This class uses java.util.zip.Deflater and Inflater
+ /// classes to compress and decompress.
+ ///
+
+ public class CompressionTools
+ {
+
+ // Export only static methods
+ private CompressionTools()
+ {
+ }
+
+ /// Compresses the specified byte range using the
+ /// specified compressionLevel (constants are defined in
+ /// java.util.zip.Deflater).
+ ///
+ public static byte[] Compress(byte[] value_Renamed, int offset, int length, int compressionLevel)
+ {
+ /* Create an expandable byte array to hold the compressed data.
+ * You cannot use an array that's the same size as the orginal because
+ * there is no guarantee that the compressed data will be smaller than
+ * the uncompressed data. */
+ System.IO.MemoryStream bos = new System.IO.MemoryStream(length);
+
+ Deflater compressor = SharpZipLib.CreateDeflater();
+
+ try
+ {
+ compressor.SetLevel(compressionLevel);
+ compressor.SetInput(value_Renamed, offset, length);
+ compressor.Finish();
+
+ // Compress the data
+ byte[] buf = new byte[1024];
+ while (!compressor.IsFinished)
+ {
+ int count = compressor.Deflate(buf);
+ bos.Write(buf, 0, count);
+ }
+ }
+ finally
+ {
+ }
+
+ return bos.ToArray();
+ }
+
+ /// Compresses the specified byte range, with default BEST_COMPRESSION level
+ public static byte[] Compress(byte[] value_Renamed, int offset, int length)
+ {
+ return Compress(value_Renamed, offset, length, Deflater.BEST_COMPRESSION);
+ }
+
+ /// Compresses all bytes in the array, with default BEST_COMPRESSION level
+ public static byte[] Compress(byte[] value_Renamed)
+ {
+ return Compress(value_Renamed, 0, value_Renamed.Length, Deflater.BEST_COMPRESSION);
+ }
+
+ /// Compresses the String value, with default BEST_COMPRESSION level
+ public static byte[] CompressString(System.String value_Renamed)
+ {
+ return CompressString(value_Renamed, Deflater.BEST_COMPRESSION);
+ }
+
+ /// Compresses the String value using the specified
+ /// compressionLevel (constants are defined in
+ /// java.util.zip.Deflater).
+ ///
+ public static byte[] CompressString(System.String value_Renamed, int compressionLevel)
+ {
+ UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
+ UnicodeUtil.UTF16toUTF8(value_Renamed, 0, value_Renamed.Length, result);
+ return Compress(result.result, 0, result.length, compressionLevel);
+ }
+
+ /// Decompress the byte array previously returned by
+ /// compress
+ ///
+ public static byte[] Decompress(byte[] value_Renamed)
+ {
+ // Create an expandable byte array to hold the decompressed data
+ System.IO.MemoryStream bos = new System.IO.MemoryStream(value_Renamed.Length);
+
+ Inflater decompressor = SharpZipLib.CreateInflater();
+
+ try
+ {
+ decompressor.SetInput(value_Renamed);
+
+ // Decompress the data
+ byte[] buf = new byte[1024];
+ while (!decompressor.IsFinished)
+ {
+ int count = decompressor.Inflate(buf);
+ bos.Write(buf, 0, count);
+ }
+ }
+ finally
+ {
+ }
+
+ return bos.ToArray();
+ }
+
+ /// Decompress the byte array previously returned by
+ /// compressString back into a String
+ ///
+ public static System.String DecompressString(byte[] value_Renamed)
+ {
+ UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
+ byte[] bytes = Decompress(value_Renamed);
+ UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.Length, result);
+ return new System.String(result.result, 0, result.length);
+ }
+ }
+}
+
diff --git a/external/Lucene.Net.Light/src/core/Document/DateField.cs b/external/Lucene.Net.Light/src/core/Document/DateField.cs
new file mode 100644
index 0000000000..6179f4c14d
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/DateField.cs
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using PrefixQuery = Lucene.Net.Search.PrefixQuery;
+using TermRangeQuery = Lucene.Net.Search.TermRangeQuery;
+// for javadoc
+
+namespace Lucene.Net.Documents
+{
+ // for javadoc
+
+ // do not remove in 3.0, needed for reading old indexes!
+
+ /// Provides support for converting dates to strings and vice-versa.
+ /// The strings are structured so that lexicographic sorting orders by date,
+ /// which makes them suitable for use as field values and search terms.
+ ///
+ ///
Note that this class saves dates with millisecond granularity,
+ /// which is bad for and , as those
+ /// queries are expanded to a BooleanQuery with a potentially large number
+ /// of terms when searching. Thus you might want to use
+ /// instead.
+ ///
+ ///
+ /// Note: dates before 1970 cannot be used, and therefore cannot be
+ /// indexed when using this class. See for an
+ /// alternative without such a limitation.
+ ///
+ ///
+ /// Another approach is , which provides
+ /// a sortable binary representation (prefix encoded) of numeric values, which
+ /// date/time are.
+ /// For indexing a , convert it to unix timestamp as
+ /// long and
+ /// index this as a numeric value with
+ /// and use to query it.
+ ///
+ ///
+ /// If you build a new index, use or
+ /// instead.
+ /// This class is included for use with existing
+ /// indices and will be removed in a future (possibly Lucene 4.0)
+ ///
+ [Obsolete("If you build a new index, use DateTools or NumericField instead.This class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0).")]
+ public class DateField
+ {
+
+ private DateField()
+ {
+ }
+
+ // make date strings long enough to last a millenium
+ private static int DATE_LEN = Number.ToString(1000L * 365 * 24 * 60 * 60 * 1000, Number.MAX_RADIX).Length;
+
+ public static System.String MIN_DATE_STRING()
+ {
+ return TimeToString(0);
+ }
+
+ public static System.String MAX_DATE_STRING()
+ {
+ char[] buffer = new char[DATE_LEN];
+ char c = Character.ForDigit(Character.MAX_RADIX - 1, Character.MAX_RADIX);
+ for (int i = 0; i < DATE_LEN; i++)
+ buffer[i] = c;
+ return new System.String(buffer);
+ }
+
+ /// Converts a Date to a string suitable for indexing.
+ /// RuntimeException if the date specified in the
+ /// method argument is before 1970
+ ///
+ public static System.String DateToString(System.DateTime date)
+ {
+ TimeSpan ts = date.Subtract(new DateTime(1970, 1, 1));
+ ts = ts.Subtract(TimeZone.CurrentTimeZone.GetUtcOffset(date));
+ return TimeToString(ts.Ticks / TimeSpan.TicksPerMillisecond);
+ }
+ /// Converts a millisecond time to a string suitable for indexing.
+ /// RuntimeException if the time specified in the
+ /// method argument is negative, that is, before 1970
+ ///
+ public static System.String TimeToString(long time)
+ {
+ if (time < 0)
+ throw new System.SystemException("time '" + time + "' is too early, must be >= 0");
+
+ System.String s = Number.ToString(time, Character.MAX_RADIX);
+
+ if (s.Length > DATE_LEN)
+ throw new System.SystemException("time '" + time + "' is too late, length of string " + "representation must be <= " + DATE_LEN);
+
+ // Pad with leading zeros
+ if (s.Length < DATE_LEN)
+ {
+ System.Text.StringBuilder sb = new System.Text.StringBuilder(s);
+ while (sb.Length < DATE_LEN)
+ sb.Insert(0, 0);
+ s = sb.ToString();
+ }
+
+ return s;
+ }
+
+ /// Converts a string-encoded date into a millisecond time.
+ public static long StringToTime(System.String s)
+ {
+ return Number.Parse(s, Number.MAX_RADIX);
+ }
+ /// Converts a string-encoded date into a Date object.
+ public static System.DateTime StringToDate(System.String s)
+ {
+ long ticks = StringToTime(s) * TimeSpan.TicksPerMillisecond;
+ System.DateTime date = new System.DateTime(1970, 1, 1);
+ date = date.AddTicks(ticks);
+ date = date.Add(TimeZone.CurrentTimeZone.GetUtcOffset(date));
+ return date;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/DateTools.cs b/external/Lucene.Net.Light/src/core/Document/DateTools.cs
new file mode 100644
index 0000000000..8263df1c34
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/DateTools.cs
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Documents
+{
+
+ /// Provides support for converting dates to strings and vice-versa.
+ /// The strings are structured so that lexicographic sorting orders
+ /// them by date, which makes them suitable for use as field values
+ /// and search terms.
+ ///
+ ///
This class also helps you to limit the resolution of your dates. Do not
+ /// save dates with a finer resolution than you really need, as then
+ /// RangeQuery and PrefixQuery will require more memory and become slower.
+ ///
+ ///
Compared to the strings generated by the methods
+ /// in this class take slightly more space, unless your selected resolution
+ /// is set to Resolution.DAY or lower.
+ ///
+ ///
+ /// Another approach is , which provides
+ /// a sortable binary representation (prefix encoded) of numeric values, which
+ /// date/time are.
+ /// For indexing a , convert it to unix timestamp as
+ /// long and
+ /// index this as a numeric value with
+ /// and use to query it.
+ ///
+ public class DateTools
+ {
+
+ private static readonly System.String YEAR_FORMAT = "yyyy";
+ private static readonly System.String MONTH_FORMAT = "yyyyMM";
+ private static readonly System.String DAY_FORMAT = "yyyyMMdd";
+ private static readonly System.String HOUR_FORMAT = "yyyyMMddHH";
+ private static readonly System.String MINUTE_FORMAT = "yyyyMMddHHmm";
+ private static readonly System.String SECOND_FORMAT = "yyyyMMddHHmmss";
+ private static readonly System.String MILLISECOND_FORMAT = "yyyyMMddHHmmssfff";
+
+ private static readonly System.Globalization.Calendar calInstance = new System.Globalization.GregorianCalendar();
+
+ // cannot create, the class has static methods only
+ private DateTools()
+ {
+ }
+
+ /// Converts a Date to a string suitable for indexing.
+ ///
+ ///
+ /// the date to be converted
+ ///
+ /// the desired resolution, see
+ ///
+ ///
+ /// a string in format yyyyMMddHHmmssSSS or shorter,
+ /// depending on resolution ; using GMT as timezone
+ ///
+ public static System.String DateToString(System.DateTime date, Resolution resolution)
+ {
+ return TimeToString(date.Ticks / TimeSpan.TicksPerMillisecond, resolution);
+ }
+
+ /// Converts a millisecond time to a string suitable for indexing.
+ ///
+ ///
+ /// the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ ///
+ /// the desired resolution, see
+ ///
+ ///
+ /// a string in format yyyyMMddHHmmssSSS or shorter,
+ /// depending on resolution ; using GMT as timezone
+ ///
+ public static System.String TimeToString(long time, Resolution resolution)
+ {
+ System.DateTime date = new System.DateTime(Round(time, resolution));
+
+ if (resolution == Resolution.YEAR)
+ {
+ return date.ToString(YEAR_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MONTH)
+ {
+ return date.ToString(MONTH_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.DAY)
+ {
+ return date.ToString(DAY_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.HOUR)
+ {
+ return date.ToString(HOUR_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MINUTE)
+ {
+ return date.ToString(MINUTE_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.SECOND)
+ {
+ return date.ToString(SECOND_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+ else if (resolution == Resolution.MILLISECOND)
+ {
+ return date.ToString(MILLISECOND_FORMAT, System.Globalization.CultureInfo.InvariantCulture);
+ }
+
+ throw new System.ArgumentException("unknown resolution " + resolution);
+ }
+
+ /// Converts a string produced by timeToString or
+ /// DateToString back to a time, represented as the
+ /// number of milliseconds since January 1, 1970, 00:00:00 GMT.
+ ///
+ ///
+ /// the date string to be converted
+ ///
+ /// the number of milliseconds since January 1, 1970, 00:00:00 GMT
+ ///
+ /// ParseException if dateString is not in the
+ /// expected format
+ ///
+ public static long StringToTime(System.String dateString)
+ {
+ return StringToDate(dateString).Ticks;
+ }
+
+ /// Converts a string produced by timeToString or
+ /// DateToString back to a time, represented as a
+ /// Date object.
+ ///
+ ///
+ /// the date string to be converted
+ ///
+ /// the parsed time as a Date object
+ ///
+ /// ParseException if dateString is not in the
+ /// expected format
+ ///
+ public static System.DateTime StringToDate(System.String dateString)
+ {
+ System.DateTime date;
+ if (dateString.Length == 4)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ 1, 1, 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 6)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ 1, 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 8)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ 0, 0, 0, 0);
+ }
+ else if (dateString.Length == 10)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ 0, 0, 0);
+ }
+ else if (dateString.Length == 12)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ 0, 0);
+ }
+ else if (dateString.Length == 14)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ Convert.ToInt16(dateString.Substring(12, 2)),
+ 0);
+ }
+ else if (dateString.Length == 17)
+ {
+ date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)),
+ Convert.ToInt16(dateString.Substring(4, 2)),
+ Convert.ToInt16(dateString.Substring(6, 2)),
+ Convert.ToInt16(dateString.Substring(8, 2)),
+ Convert.ToInt16(dateString.Substring(10, 2)),
+ Convert.ToInt16(dateString.Substring(12, 2)),
+ Convert.ToInt16(dateString.Substring(14, 3)));
+ }
+ else
+ {
+ throw new System.FormatException("Input is not valid date string: " + dateString);
+ }
+ return date;
+ }
+
+ /// Limit a date's resolution. For example, the date 2004-09-21 13:50:11
+ /// will be changed to 2004-09-01 00:00:00 when using
+ /// Resolution.MONTH .
+ ///
+ ///
+ ///
+ /// The desired resolution of the date to be returned
+ ///
+ /// the date with all values more precise than resolution
+ /// set to 0 or 1
+ ///
+ public static System.DateTime Round(System.DateTime date, Resolution resolution)
+ {
+ return new System.DateTime(Round(date.Ticks / TimeSpan.TicksPerMillisecond, resolution));
+ }
+
+ /// Limit a date's resolution. For example, the date 1095767411000
+ /// (which represents 2004-09-21 13:50:11) will be changed to
+ /// 1093989600000 (2004-09-01 00:00:00) when using
+ /// Resolution.MONTH .
+ ///
+ ///
+ /// The time in milliseconds (not ticks).
+ /// The desired resolution of the date to be returned
+ ///
+ /// the date with all values more precise than resolution
+ /// set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+ ///
+ public static long Round(long time, Resolution resolution)
+ {
+ System.DateTime dt = new System.DateTime(time * TimeSpan.TicksPerMillisecond);
+
+ if (resolution == Resolution.YEAR)
+ {
+ dt = dt.AddMonths(1 - dt.Month);
+ dt = dt.AddDays(1 - dt.Day);
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MONTH)
+ {
+ dt = dt.AddDays(1 - dt.Day);
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.DAY)
+ {
+ dt = dt.AddHours(0 - dt.Hour);
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.HOUR)
+ {
+ dt = dt.AddMinutes(0 - dt.Minute);
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MINUTE)
+ {
+ dt = dt.AddSeconds(0 - dt.Second);
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.SECOND)
+ {
+ dt = dt.AddMilliseconds(0 - dt.Millisecond);
+ }
+ else if (resolution == Resolution.MILLISECOND)
+ {
+ // don't cut off anything
+ }
+ else
+ {
+ throw new System.ArgumentException("unknown resolution " + resolution);
+ }
+ return dt.Ticks;
+ }
+
+ /// Specifies the time granularity.
+ public class Resolution
+ {
+
+ public static readonly Resolution YEAR = new Resolution("year");
+ public static readonly Resolution MONTH = new Resolution("month");
+ public static readonly Resolution DAY = new Resolution("day");
+ public static readonly Resolution HOUR = new Resolution("hour");
+ public static readonly Resolution MINUTE = new Resolution("minute");
+ public static readonly Resolution SECOND = new Resolution("second");
+ public static readonly Resolution MILLISECOND = new Resolution("millisecond");
+
+ private System.String resolution;
+
+ internal Resolution()
+ {
+ }
+
+ internal Resolution(System.String resolution)
+ {
+ this.resolution = resolution;
+ }
+
+ public override System.String ToString()
+ {
+ return resolution;
+ }
+ }
+ static DateTools()
+ {
+ {
+ // times need to be normalized so the value doesn't depend on the
+ // location the index is created/used:
+ // {{Aroush-2.1}}
+ /*
+ YEAR_FORMAT.setTimeZone(GMT);
+ MONTH_FORMAT.setTimeZone(GMT);
+ DAY_FORMAT.setTimeZone(GMT);
+ HOUR_FORMAT.setTimeZone(GMT);
+ MINUTE_FORMAT.setTimeZone(GMT);
+ SECOND_FORMAT.setTimeZone(GMT);
+ MILLISECOND_FORMAT.setTimeZone(GMT);
+ */
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/Document.cs b/external/Lucene.Net.Light/src/core/Document/Document.cs
new file mode 100644
index 0000000000..f24a46aea0
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/Document.cs
@@ -0,0 +1,382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+// for javadoc
+using IndexReader = Lucene.Net.Index.IndexReader;
+using ScoreDoc = Lucene.Net.Search.ScoreDoc;
+using Searcher = Lucene.Net.Search.Searcher;
+
+namespace Lucene.Net.Documents
+{
+
+ /// Documents are the unit of indexing and search.
+ ///
+ /// A Document is a set of fields. Each field has a name and a textual value.
+ /// A field may be stored with the document, in which
+ /// case it is returned with search hits on the document. Thus each document
+ /// should typically contain one or more stored fields which uniquely identify
+ /// it.
+ ///
+ ///
Note that fields which are not stored are
+ /// not available in documents retrieved from the index, e.g. with ,
+ /// or .
+ ///
+
+ [Serializable]
+ public sealed class Document
+ {
+ private class AnonymousClassEnumeration : System.Collections.IEnumerator
+ {
+ public AnonymousClassEnumeration(Document enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(Document enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ iter = Enclosing_Instance.fields.GetEnumerator();
+ }
+ private System.Object tempAuxObj;
+ public bool MoveNext()
+ {
+ bool result = HasMoreElements();
+ if (result)
+ {
+ tempAuxObj = NextElement();
+ }
+ return result;
+ }
+ public void Reset()
+ {
+ tempAuxObj = null;
+ }
+ public System.Object Current
+ {
+ get
+ {
+ return tempAuxObj;
+ }
+
+ }
+ private Document enclosingInstance;
+ public Document Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal System.Collections.IEnumerator iter;
+ public bool HasMoreElements()
+ {
+ return iter.MoveNext();
+ }
+ public System.Object NextElement()
+ {
+ return iter.Current;
+ }
+ }
+ internal System.Collections.Generic.IList fields = new System.Collections.Generic.List();
+ private float boost = 1.0f;
+
+ /// Constructs a new document with no fields.
+ public Document()
+ {
+ }
+
+
+ /// Gets or sets, at indexing time, the boost factor.
+ ///
+ /// The default is 1.0
+ ///
+ ///
Note that once a document is indexed this value is no longer available
+ /// from the index. At search time, for retrieved documents, this method always
+ /// returns 1. This however does not mean that the boost value set at indexing
+ /// time was ignored - it was just combined with other indexing time factors and
+ /// stored elsewhere, for better indexing and search performance. (For more
+ /// information see the "norm(t,d)" part of the scoring formula in
+ /// Similarity .)
+ ///
+ public float Boost
+ {
+ get { return boost; }
+ set { this.boost = value; }
+ }
+
+ ///
Adds a field to a document. Several fields may be added with
+ /// the same name. In this case, if the fields are indexed, their text is
+ /// treated as though appended for the purposes of search.
+ ///
Note that add like the removeField(s) methods only makes sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.
+ ///
+ public void Add(IFieldable field)
+ {
+ fields.Add(field);
+ }
+
+ ///
Removes field with the specified name from the document.
+ /// If multiple fields exist with this name, this method removes the first field that has been added.
+ /// If there is no field with the specified name, the document remains unchanged.
+ ///
Note that the removeField(s) methods like the add method only make sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.
+ ///
+ public void RemoveField(System.String name)
+ {
+ System.Collections.Generic.IEnumerator it = fields.GetEnumerator();
+ while (it.MoveNext())
+ {
+ IFieldable field = it.Current;
+ if (field.Name.Equals(name))
+ {
+ fields.Remove(field);
+ return ;
+ }
+ }
+ }
+
+ ///
Removes all fields with the given name from the document.
+ /// If there is no field with the specified name, the document remains unchanged.
+ ///
Note that the removeField(s) methods like the add method only make sense
+ /// prior to adding a document to an index. These methods cannot
+ /// be used to change the content of an existing index! In order to achieve this,
+ /// a document has to be deleted from an index and a new changed version of that
+ /// document has to be added.
+ ///
+ public void RemoveFields(System.String name)
+ {
+ for (int i = fields.Count - 1; i >= 0; i--)
+ {
+ IFieldable field = fields[i];
+ if (field.Name.Equals(name))
+ {
+ fields.RemoveAt(i);
+ }
+ }
+ }
+
+ /// Returns a field with the given name if any exist in this document, or
+ /// null. If multiple fields exists with this name, this method returns the
+ /// first value added.
+ /// Do not use this method with lazy loaded fields.
+ ///
+ public Field GetField(System.String name)
+ {
+ return (Field) GetFieldable(name);
+ }
+
+
+ /// Returns a field with the given name if any exist in this document, or
+ /// null. If multiple fields exists with this name, this method returns the
+ /// first value added.
+ ///
+ public IFieldable GetFieldable(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ return field;
+ }
+ return null;
+ }
+
+ /// Returns the string value of the field with the given name if any exist in
+ /// this document, or null. If multiple fields exist with this name, this
+ /// method returns the first value added. If only binary fields with this name
+ /// exist, returns null.
+ ///
+ public System.String Get(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (!field.IsBinary))
+ return field.StringValue;
+ }
+ return null;
+ }
+
+ /// Returns a List of all the fields in a document.
+ ///
Note that fields which are not stored are
+ /// not available in documents retrieved from the
+ /// index, e.g. or .
+ ///
+ public System.Collections.Generic.IList GetFields()
+ {
+ return fields;
+ }
+
+ private static readonly Field[] NO_FIELDS = new Field[0];
+
+ /// Returns an array of s with the given name.
+ /// Do not use with lazy loaded fields.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ ///
+ ///
+ /// the name of the field
+ ///
+ /// a Field[] array
+ ///
+ public Field[] GetFields(System.String name)
+ {
+ var result = new System.Collections.Generic.List();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ {
+ result.Add((Field)field);
+ }
+ }
+
+ if (result.Count == 0)
+ return NO_FIELDS;
+
+ return result.ToArray();
+ }
+
+
+ private static readonly IFieldable[] NO_FIELDABLES = new IFieldable[0];
+
+ /// Returns an array of s with the given name.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ ///
+ ///
+ /// the name of the field
+ ///
+ /// a Fieldable[] array
+ ///
+ public IFieldable[] GetFieldables(System.String name)
+ {
+ var result = new System.Collections.Generic.List();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name))
+ {
+ result.Add(field);
+ }
+ }
+
+ if (result.Count == 0)
+ return NO_FIELDABLES;
+
+ return result.ToArray();
+ }
+
+
+ private static readonly System.String[] NO_STRINGS = new System.String[0];
+
+ /// Returns an array of values of the field specified as the method parameter.
+ /// This method returns an empty array when there are no
+ /// matching fields. It never returns null.
+ ///
+ /// the name of the field
+ ///
+ /// a String[] of field values
+ ///
+ public System.String[] GetValues(System.String name)
+ {
+ var result = new System.Collections.Generic.List();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (!field.IsBinary))
+ result.Add(field.StringValue);
+ }
+
+ if (result.Count == 0)
+ return NO_STRINGS;
+
+ return result.ToArray();
+ }
+
+ private static readonly byte[][] NO_BYTES = new byte[0][];
+
+ /// Returns an array of byte arrays for of the fields that have the name specified
+ /// as the method parameter. This method returns an empty
+ /// array when there are no matching fields. It never
+ /// returns null.
+ ///
+ ///
+ /// the name of the field
+ ///
+ /// a byte[][] of binary field values
+ ///
+ public byte[][] GetBinaryValues(System.String name)
+ {
+ var result = new System.Collections.Generic.List();
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (field.IsBinary))
+ result.Add(field.GetBinaryValue());
+ }
+
+ if (result.Count == 0)
+ return NO_BYTES;
+
+ return result.ToArray();
+ }
+
+ /// Returns an array of bytes for the first (or only) field that has the name
+ /// specified as the method parameter. This method will return null
+ /// if no binary fields with the specified name are available.
+ /// There may be non-binary fields with the same name.
+ ///
+ ///
+ /// the name of the field.
+ ///
+ /// a byte[] containing the binary field value or null
+ ///
+ public byte[] GetBinaryValue(System.String name)
+ {
+ foreach(IFieldable field in fields)
+ {
+ if (field.Name.Equals(name) && (field.IsBinary))
+ return field.GetBinaryValue();
+ }
+ return null;
+ }
+
+ /// Prints the fields of a document for human consumption.
+ public override System.String ToString()
+ {
+ System.Text.StringBuilder buffer = new System.Text.StringBuilder();
+ buffer.Append("Document<");
+ for (int i = 0; i < fields.Count; i++)
+ {
+ IFieldable field = fields[i];
+ buffer.Append(field.ToString());
+ if (i != fields.Count - 1)
+ buffer.Append(" ");
+ }
+ buffer.Append(">");
+ return buffer.ToString();
+ }
+
+ public System.Collections.Generic.IList fields_ForNUnit
+ {
+ get { return fields; }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/Field.cs b/external/Lucene.Net.Light/src/core/Document/Field.cs
new file mode 100644
index 0000000000..d39d9f476b
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/Field.cs
@@ -0,0 +1,667 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using IndexWriter = Lucene.Net.Index.IndexWriter;
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Documents
+{
+
+ /// A field is a section of a Document. Each field has two parts, a name and a
+ /// value. Values may be free text, provided as a String or as a Reader, or they
+ /// may be atomic keywords, which are not further processed. Such keywords may
+ /// be used to represent dates, urls, etc. Fields are optionally stored in the
+ /// index, so that they may be returned with hits on the document.
+ ///
+
+ [Serializable]
+ public sealed class Field:AbstractField, IFieldable
+ {
+ /// Specifies whether and how a field should be stored.
+ public enum Store
+ {
+ /// Store the original field value in the index. This is useful for short texts
+ /// like a document's title which should be displayed with the results. The
+ /// value is stored in its original form, i.e. no analyzer is used before it is
+ /// stored.
+ ///
+ YES,
+
+ /// Do not store the field value in the index.
+ NO
+ }
+
+ /// Specifies whether and how a field should be indexed.
+
+ public enum Index
+ {
+ /// Do not index the field value. This field can thus not be searched,
+ /// but one can still access its contents provided it is
+ /// stored .
+ ///
+ NO,
+
+ /// Index the tokens produced by running the field's
+ /// value through an Analyzer. This is useful for
+ /// common text.
+ ///
+ ANALYZED,
+
+ /// Index the field's value without using an Analyzer, so it can be searched.
+ /// As no analyzer is used the value will be stored as a single term. This is
+ /// useful for unique Ids like product numbers.
+ ///
+ NOT_ANALYZED,
+
+ /// Expert: Index the field's value without an Analyzer,
+ /// and also disable the storing of norms. Note that you
+ /// can also separately enable/disable norms by setting
+ /// . No norms means that
+ /// index-time field and document boosting and field
+ /// length normalization are disabled. The benefit is
+ /// less memory usage as norms take up one byte of RAM
+ /// per indexed field for every document in the index,
+ /// during searching. Note that once you index a given
+ /// field with norms enabled, disabling norms will
+ /// have no effect. In other words, for this to have the
+ /// above described effect on a field, all instances of
+ /// that field must be indexed with NOT_ANALYZED_NO_NORMS
+ /// from the beginning.
+ ///
+ NOT_ANALYZED_NO_NORMS,
+
+ /// Expert: Index the tokens produced by running the
+ /// field's value through an Analyzer, and also
+ /// separately disable the storing of norms. See
+ /// for what norms are
+ /// and why you may want to disable them.
+ ///
+ ANALYZED_NO_NORMS,
+ }
+
+ /// Specifies whether and how a field should have term vectors.
+ public enum TermVector
+ {
+ /// Do not store term vectors.
+ NO,
+
+ /// Store the term vectors of each document. A term vector is a list
+ /// of the document's terms and their number of occurrences in that document.
+ ///
+ YES,
+
+ /// Store the term vector + token position information
+ ///
+ ///
+ ///
+ ///
+ WITH_POSITIONS,
+
+ /// Store the term vector + Token offset information
+ ///
+ ///
+ ///
+ ///
+ WITH_OFFSETS,
+
+ /// Store the term vector + Token position and offset information
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ WITH_POSITIONS_OFFSETS,
+ }
+
+
+ /// The value of the field as a String, or null. If null, the Reader value or
+ /// binary value is used. Exactly one of stringValue(),
+ /// readerValue(), and getBinaryValue() must be set.
+ ///
+ public override string StringValue
+ {
+ get { return fieldsData is System.String ? (System.String) fieldsData : null; }
+ }
+
+ /// The value of the field as a Reader, or null. If null, the String value or
+ /// binary value is used. Exactly one of stringValue(),
+ /// readerValue(), and getBinaryValue() must be set.
+ ///
+ public override TextReader ReaderValue
+ {
+ get { return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null; }
+ }
+
+ /// The TokesStream for this field to be used when indexing, or null. If null, the Reader value
+ /// or String value is analyzed to produce the indexed tokens.
+ ///
+ public override TokenStream TokenStreamValue
+ {
+ get { return tokenStream; }
+ }
+
+
+ ///
Expert: change the value of this field. This can
+ /// be used during indexing to re-use a single Field
+ /// instance to improve indexing speed by avoiding GC cost
+ /// of new'ing and reclaiming Field instances. Typically
+ /// a single instance is re-used as
+ /// well. This helps most on small documents.
+ ///
+ ///
Each Field instance should only be used once
+ /// within a single instance. See ImproveIndexingSpeed
+ /// for details.
+ ///
+ public void SetValue(System.String value)
+ {
+ if (internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a String value on a binary field");
+ }
+ fieldsData = value;
+ }
+
+ /// Expert: change the value of this field. See setValue(String) .
+ public void SetValue(System.IO.TextReader value)
+ {
+ if (internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a Reader value on a binary field");
+ }
+ if (internalIsStored)
+ {
+ throw new System.ArgumentException("cannot set a Reader value on a stored field");
+ }
+ fieldsData = value;
+ }
+
+ /// Expert: change the value of this field. See setValue(String) .
+ public void SetValue(byte[] value)
+ {
+ if (!internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a byte[] value on a non-binary field");
+ }
+ fieldsData = value;
+ internalBinaryLength = value.Length;
+ internalbinaryOffset = 0;
+ }
+
+ /// Expert: change the value of this field. See setValue(String) .
+ public void SetValue(byte[] value, int offset, int length)
+ {
+ if (!internalIsBinary)
+ {
+ throw new System.ArgumentException("cannot set a byte[] value on a non-binary field");
+ }
+ fieldsData = value;
+ internalBinaryLength = length;
+ internalbinaryOffset = offset;
+ }
+
+ /// Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
+ /// May be combined with stored values from stringValue() or GetBinaryValue()
+ ///
+ public void SetTokenStream(TokenStream tokenStream)
+ {
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+ this.tokenStream = tokenStream;
+ }
+
+ /// Create a field by specifying its name, value and how it will
+ /// be saved in the index. Term vectors will not be stored in the index.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// The string to process
+ ///
+ /// Whether value should be stored in the index
+ ///
+ /// Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ ///
+ /// NullPointerException if name or value is null
+ /// IllegalArgumentException if the field is neither stored nor indexed
+ public Field(System.String name, System.String value, Store store, Index index)
+ : this(name, value, store, index, TermVector.NO)
+ {
+ }
+
+ /// Create a field by specifying its name, value and how it will
+ /// be saved in the index.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// The string to process
+ ///
+ /// Whether value should be stored in the index
+ ///
+ /// Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ ///
+ /// Whether term vector should be stored
+ ///
+ /// NullPointerException if name or value is null
+ /// IllegalArgumentException in any of the following situations:
+ ///
+ /// - the field is neither stored nor indexed
+ /// - the field is not indexed but termVector is
TermVector.YES
+ ///
+ ///
+ public Field(System.String name, System.String value, Store store, Index index, TermVector termVector)
+ : this(name, true, value, store, index, termVector)
+ {
+ }
+
+ /// Create a field by specifying its name, value and how it will
+ /// be saved in the index.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// Whether to .intern() name or not
+ ///
+ /// The string to process
+ ///
+ /// Whether value should be stored in the index
+ ///
+ /// Whether the field should be indexed, and if so, if it should
+ /// be tokenized before indexing
+ ///
+ /// Whether term vector should be stored
+ ///
+ /// NullPointerException if name or value is null
+ /// IllegalArgumentException in any of the following situations:
+ ///
+ /// - the field is neither stored nor indexed
+ /// - the field is not indexed but termVector is
TermVector.YES
+ ///
+ ///
+ public Field(System.String name, bool internName, System.String value, Store store, Index index, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (value == null)
+ throw new System.NullReferenceException("value cannot be null");
+ if (name.Length == 0 && value.Length == 0)
+ throw new System.ArgumentException("name and value cannot both be empty");
+ if (index == Index.NO && store == Store.NO)
+ throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
+ if (index == Index.NO && termVector != TermVector.NO)
+ throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
+
+ if (internName)
+ // field names are optionally interned
+ name = StringHelper.Intern(name);
+
+ this.internalName = name;
+
+ this.fieldsData = value;
+
+ this.internalIsStored = store.IsStored();
+
+ this.internalIsIndexed = index.IsIndexed();
+ this.internalIsTokenized = index.IsAnalyzed();
+ this.internalOmitNorms = index.OmitNorms();
+
+ if (index == Index.NO)
+ {
+ this.internalOmitTermFreqAndPositions = false;
+ }
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// Create a tokenized and indexed field that is not stored. Term vectors will
+ /// not be stored. The Reader is read only when the Document is added to the index,
+ /// i.e. you may not close the Reader until
+ /// has been called.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// The reader with the content
+ ///
+ /// NullPointerException if name or reader is null
+ public Field(System.String name, System.IO.TextReader reader):this(name, reader, TermVector.NO)
+ {
+ }
+
+ /// Create a tokenized and indexed field that is not stored, optionally with
+ /// storing term vectors. The Reader is read only when the Document is added to the index,
+ /// i.e. you may not close the Reader until
+ /// has been called.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// The reader with the content
+ ///
+ /// Whether term vector should be stored
+ ///
+ /// NullPointerException if name or reader is null
+ public Field(System.String name, System.IO.TextReader reader, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (reader == null)
+ throw new System.NullReferenceException("reader cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ this.fieldsData = reader;
+
+ this.internalIsStored = false;
+
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+ /// Create a tokenized and indexed field that is not stored. Term vectors will
+ /// not be stored. This is useful for pre-analyzed fields.
+ /// The TokenStream is read only when the Document is added to the index,
+ /// i.e. you may not close the TokenStream until
+ /// has been called.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// The TokenStream with the content
+ ///
+ /// NullPointerException if name or tokenStream is null
+ public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO)
+ {
+ }
+
+ /// Create a tokenized and indexed field that is not stored, optionally with
+ /// storing term vectors. This is useful for pre-analyzed fields.
+ /// The TokenStream is read only when the Document is added to the index,
+ /// i.e. you may not close the TokenStream until
+ /// has been called.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// The TokenStream with the content
+ ///
+ /// Whether term vector should be stored
+ ///
+ /// NullPointerException if name or tokenStream is null
+ public Field(System.String name, TokenStream tokenStream, TermVector termVector)
+ {
+ if (name == null)
+ throw new System.NullReferenceException("name cannot be null");
+ if (tokenStream == null)
+ throw new System.NullReferenceException("tokenStream cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ this.fieldsData = null;
+ this.tokenStream = tokenStream;
+
+ this.internalIsStored = false;
+
+ this.internalIsIndexed = true;
+ this.internalIsTokenized = true;
+
+ this.internalIsBinary = false;
+
+ SetStoreTermVector(termVector);
+ }
+
+
+ /// Create a stored field with binary value. Optionally the value may be compressed.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// The binary value
+ ///
+ /// How value should be stored (compressed or not)
+ ///
+ /// IllegalArgumentException if store is Store.NO
+ public Field(System.String name, byte[] value_Renamed, Store store):this(name, value_Renamed, 0, value_Renamed.Length, store)
+ {
+ }
+
+ /// Create a stored field with binary value. Optionally the value may be compressed.
+ ///
+ ///
+ /// The name of the field
+ ///
+ /// The binary value
+ ///
+ /// Starting offset in value where this Field's bytes are
+ ///
+ /// Number of bytes to use for this Field, starting at offset
+ ///
+ /// How value should be stored (compressed or not)
+ ///
+ /// IllegalArgumentException if store is Store.NO
+ public Field(System.String name, byte[] value_Renamed, int offset, int length, Store store)
+ {
+
+ if (name == null)
+ throw new System.ArgumentException("name cannot be null");
+ if (value_Renamed == null)
+ throw new System.ArgumentException("value cannot be null");
+
+ this.internalName = StringHelper.Intern(name); // field names are interned
+ fieldsData = value_Renamed;
+
+ if (store == Store.NO)
+ throw new System.ArgumentException("binary values can't be unstored");
+
+ internalIsStored = store.IsStored();
+ internalIsIndexed = false;
+ internalIsTokenized = false;
+ internalOmitTermFreqAndPositions = false;
+ internalOmitNorms = true;
+
+ internalIsBinary = true;
+ internalBinaryLength = length;
+ internalbinaryOffset = offset;
+
+ SetStoreTermVector(TermVector.NO);
+ }
+ }
+
+ public static class FieldExtensions
+ {
+ public static bool IsStored(this Field.Store store)
+ {
+ switch(store)
+ {
+ case Field.Store.YES:
+ return true;
+ case Field.Store.NO:
+ return false;
+ default:
+ throw new ArgumentOutOfRangeException("store", "Invalid value for Field.Store");
+ }
+ }
+
+ public static bool IsIndexed(this Field.Index index)
+ {
+ switch(index)
+ {
+ case Field.Index.NO:
+ return false;
+ case Field.Index.ANALYZED:
+ case Field.Index.NOT_ANALYZED:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool IsAnalyzed(this Field.Index index)
+ {
+ switch (index)
+ {
+ case Field.Index.NO:
+ case Field.Index.NOT_ANALYZED:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ return false;
+ case Field.Index.ANALYZED:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool OmitNorms(this Field.Index index)
+ {
+ switch (index)
+ {
+ case Field.Index.ANALYZED:
+ case Field.Index.NOT_ANALYZED:
+ return false;
+ case Field.Index.NO:
+ case Field.Index.NOT_ANALYZED_NO_NORMS:
+ case Field.Index.ANALYZED_NO_NORMS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index");
+ }
+ }
+
+ public static bool IsStored(this Field.TermVector tv)
+ {
+ switch(tv)
+ {
+ case Field.TermVector.NO:
+ return false;
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_OFFSETS:
+ case Field.TermVector.WITH_POSITIONS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static bool WithPositions(this Field.TermVector tv)
+ {
+ switch (tv)
+ {
+ case Field.TermVector.NO:
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_OFFSETS:
+ return false;
+ case Field.TermVector.WITH_POSITIONS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static bool WithOffsets(this Field.TermVector tv)
+ {
+ switch (tv)
+ {
+ case Field.TermVector.NO:
+ case Field.TermVector.YES:
+ case Field.TermVector.WITH_POSITIONS:
+ return false;
+ case Field.TermVector.WITH_OFFSETS:
+ case Field.TermVector.WITH_POSITIONS_OFFSETS:
+ return true;
+ default:
+ throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector");
+ }
+ }
+
+ public static Field.Index ToIndex(bool indexed, bool analyed)
+ {
+ return ToIndex(indexed, analyed, false);
+ }
+
+ public static Field.Index ToIndex(bool indexed, bool analyzed, bool omitNorms)
+ {
+
+ // If it is not indexed nothing else matters
+ if (!indexed)
+ {
+ return Field.Index.NO;
+ }
+
+ // typical, non-expert
+ if (!omitNorms)
+ {
+ if (analyzed)
+ {
+ return Field.Index.ANALYZED;
+ }
+ return Field.Index.NOT_ANALYZED;
+ }
+
+ // Expert: Norms omitted
+ if (analyzed)
+ {
+ return Field.Index.ANALYZED_NO_NORMS;
+ }
+ return Field.Index.NOT_ANALYZED_NO_NORMS;
+ }
+
+ ///
+ /// Get the best representation of a TermVector given the flags.
+ ///
+ public static Field.TermVector ToTermVector(bool stored, bool withOffsets, bool withPositions)
+ {
+ // If it is not stored, nothing else matters.
+ if (!stored)
+ {
+ return Field.TermVector.NO;
+ }
+
+ if (withOffsets)
+ {
+ if (withPositions)
+ {
+ return Field.TermVector.WITH_POSITIONS_OFFSETS;
+ }
+ return Field.TermVector.WITH_OFFSETS;
+ }
+
+ if (withPositions)
+ {
+ return Field.TermVector.WITH_POSITIONS;
+ }
+ return Field.TermVector.YES;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/FieldSelector.cs b/external/Lucene.Net.Light/src/core/Document/FieldSelector.cs
new file mode 100644
index 0000000000..f940f08500
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/FieldSelector.cs
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Documents
+{
+ /// Similar to a
+ /// java.io.FileFilter , the FieldSelector allows one to make decisions about
+ /// what Fields get loaded on a by
+ ///
+ public interface FieldSelector
+ {
+
+ ///
+ /// the field to accept or reject
+ ///
+ /// an instance of
+ /// if the named fieldName should be loaded.
+ ///
+ FieldSelectorResult Accept(System.String fieldName);
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/FieldSelectorResult.cs b/external/Lucene.Net.Light/src/core/Document/FieldSelectorResult.cs
new file mode 100644
index 0000000000..7d3a889cb2
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/FieldSelectorResult.cs
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using System.Runtime.InteropServices;
+
+namespace Lucene.Net.Documents
+{
+ /// Provides information about what should be done with this Field
+ public enum FieldSelectorResult
+ {
+ ///
+ ///
+ ///
+ INVALID, // TODO: This is kinda a kludgy workaround for the fact enums can't be null
+
+ /// Load this every time the is loaded, reading in the data as it is encountered.
+ /// and should not return null.
+ ///
+ /// should be called by the Reader.
+ ///
+ LOAD,
+
+ /// Lazily load this . This means the is valid, but it may not actually contain its data until
+ /// invoked. SHOULD NOT BE USED. is safe to use and should
+ /// return a valid instance of a .
+ ///
+ /// should be called by the Reader.
+ ///
+ LAZY_LOAD,
+
+ /// Do not load the . and should return null.
+ /// is not called.
+ ///
+ /// should not be called by the Reader.
+ ///
+ NO_LOAD,
+
+ /// Load this field as in the case, but immediately return from loading for the . Thus, the
+ /// Document may not have its complete set of Fields. and should
+ /// both be valid for this
+ ///
+ /// should be called by the Reader.
+ ///
+ LOAD_AND_BREAK,
+
+ /// Expert: Load the size of this rather than its value.
+ /// Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
+ /// The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
+ ///
+ SIZE,
+
+ /// Expert: Like but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded
+ SIZE_AND_BREAK
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/Fieldable.cs b/external/Lucene.Net.Light/src/core/Document/Fieldable.cs
new file mode 100644
index 0000000000..89d37d1c6a
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/Fieldable.cs
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using FieldInvertState = Lucene.Net.Index.FieldInvertState;
+
+namespace Lucene.Net.Documents
+{
+ /// Synonymous with .
+ ///
+ ///
WARNING : This interface may change within minor versions, despite Lucene's backward compatibility requirements.
+ /// This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards
+ /// compatibility promises remain intact. For example, Lucene can still
+ /// read and write indices created within the same major version.
+ ///
+ ///
+ ///
+ ///
+ public interface IFieldable
+ {
+ /// Gets or sets the boost factor for hits for this field. This value will be
+ /// multiplied into the score of all hits on this this field of this
+ /// document.
+ ///
+ ///
The boost is multiplied by of the document
+ /// containing this field. If a document has multiple fields with the same
+ /// name, all such values are multiplied together. This product is then
+ /// used to compute the norm factor for the field. By
+ /// default, in the
+ /// method, the boost value is multiplied
+ /// by the
+ /// and then rounded by before it is stored in the
+ /// index. One should attempt to ensure that this product does not overflow
+ /// the range of that encoding.
+ ///
+ ///
The default value is 1.0.
+ ///
+ ///
Note: this value is not stored directly with the document in the index.
+ /// Documents returned from and
+ /// may thus not have the same value present as when
+ /// this field was indexed.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ float Boost { get; set; }
+
+ /// Returns the name of the field as an interned string.
+ /// For example "date", "title", "body", ...
+ ///
+ string Name { get; }
+
+ /// The value of the field as a String, or null.
+ ///
+ /// For indexing, if isStored()==true, the stringValue() will be used as the stored field value
+ /// unless isBinary()==true, in which case GetBinaryValue() will be used.
+ ///
+ /// If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
+ /// If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
+ /// else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
+ ///
+ string StringValue { get; }
+
+ /// The value of the field as a Reader, which can be used at index time to generate indexed tokens.
+ ///
+ ///
+ TextReader ReaderValue { get; }
+
+ /// The TokenStream for this field to be used when indexing, or null.
+ ///
+ ///
+ TokenStream TokenStreamValue { get; }
+
+ /// True if the value of the field is to be stored in the index for return
+ /// with search hits.
+ ///
+ bool IsStored { get; }
+
+ /// True if the value of the field is to be indexed, so that it may be
+ /// searched on.
+ ///
+ bool IsIndexed { get; }
+
+ /// True if the value of the field should be tokenized as text prior to
+ /// indexing. Un-tokenized fields are indexed as a single word and may not be
+ /// Reader-valued.
+ ///
+ bool IsTokenized { get; }
+
+ /// True if the term or terms used to index this field are stored as a term
+ /// vector, available from .
+ /// These methods do not provide access to the original content of the field,
+ /// only to terms used to index it. If the original content must be
+ /// preserved, use the stored attribute instead.
+ ///
+ ///
+ ///
+ ///
+ bool IsTermVectorStored { get; }
+
+ /// True if terms are stored as term vector together with their offsets
+ /// (start and end positon in source text).
+ ///
+ bool IsStoreOffsetWithTermVector { get; }
+
+ /// True if terms are stored as term vector together with their token positions.
+ bool IsStorePositionWithTermVector { get; }
+
+ /// True if the value of the field is stored as binary
+ bool IsBinary { get; }
+
+ ///
+ /// True if norms are omitted for this indexed field.
+ ///
+ /// Expert:
+ /// If set, omit normalization factors associated with this indexed field.
+ /// This effectively disables indexing boosts and length normalization for this field.
+ ///
+ ///
+ bool OmitNorms { get; set; }
+
+
+ /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
+ /// it's values via or is only valid as long as the that
+ /// retrieved the is still open.
+ ///
+ ///
+ /// true if this field can be loaded lazily
+ bool IsLazy { get; }
+
+ /// Returns offset into byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ ///
+ /// index of the first character in byte[] segment that represents this Field value
+ int BinaryOffset { get; }
+
+ /// Returns length of byte[] segment that is used as value, if Field is not binary
+ /// returned value is undefined
+ ///
+ /// length of byte[] segment that represents this Field value
+ int BinaryLength { get; }
+
+ /// Return the raw byte[] for the binary field. Note that
+ /// you must also call and
+ /// to know which range of bytes in this
+ /// returned array belong to the field.
+ ///
+ /// reference to the Field value as byte[].
+ byte[] GetBinaryValue();
+
+ /// Return the raw byte[] for the binary field. Note that
+ /// you must also call and
+ /// to know which range of bytes in this
+ /// returned array belong to the field.
+ /// About reuse: if you pass in the result byte[] and it is
+ /// used, likely the underlying implementation will hold
+ /// onto this byte[] and return it in future calls to
+ /// or .
+ /// So if you subsequently re-use the same byte[] elsewhere
+ /// it will alter this Fieldable's value.
+ ///
+ /// User defined buffer that will be used if
+ /// possible. If this is null or not large enough, a new
+ /// buffer is allocated
+ ///
+ /// reference to the Field value as byte[].
+ ///
+ byte[] GetBinaryValue(byte[] result);
+
+ /// Expert:
+ ///
+ /// If set, omit term freq, positions and payloads from
+ /// postings for this field.
+ ///
+ ///
+ /// NOTE : While this option reduces storage space
+ /// required in the index, it also means any query
+ /// requiring positional information, such as
+ /// or
+ ///
+ /// subclasses will silently fail to find results.
+ ///
+ bool OmitTermFreqAndPositions { set; get; }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/LoadFirstFieldSelector.cs b/external/Lucene.Net.Light/src/core/Document/LoadFirstFieldSelector.cs
new file mode 100644
index 0000000000..4f353f6f8d
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/LoadFirstFieldSelector.cs
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+namespace Lucene.Net.Documents
+{
+
+ /// Load the First field and break.
+ ///
+ /// See
+ ///
+ [Serializable]
+ public class LoadFirstFieldSelector : FieldSelector
+ {
+
+ public virtual FieldSelectorResult Accept(System.String fieldName)
+ {
+ return FieldSelectorResult.LOAD_AND_BREAK;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/MapFieldSelector.cs b/external/Lucene.Net.Light/src/core/Document/MapFieldSelector.cs
new file mode 100644
index 0000000000..92a8959abe
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/MapFieldSelector.cs
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Linq;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Documents
+{
+ /// A based on a Map of field names to s
+ [Serializable]
+ public class MapFieldSelector : FieldSelector
+ {
+ internal IDictionary fieldSelections;
+
+ /// Create a a MapFieldSelector
+ /// maps from field names (String) to s
+ ///
+ public MapFieldSelector(IDictionary fieldSelections)
+ {
+ this.fieldSelections = fieldSelections;
+ }
+
+ /// Create a a MapFieldSelector
+ /// fields to LOAD. List of Strings. All other fields are NO_LOAD.
+ ///
+ public MapFieldSelector(IList fields)
+ {
+ fieldSelections = new HashMap(fields.Count * 5 / 3);
+ foreach(var field in fields)
+ fieldSelections[field] = FieldSelectorResult.LOAD;
+ }
+
+ /// Create a a MapFieldSelector
+ /// fields to LOAD. All other fields are NO_LOAD.
+ ///
+ public MapFieldSelector(params System.String[] fields)
+ : this(fields.ToList()) // TODO: this is slow
+ {
+ }
+
+ /// Load field according to its associated value in fieldSelections
+ /// a field name
+ ///
+ /// the fieldSelections value that field maps to or NO_LOAD if none.
+ ///
+ public virtual FieldSelectorResult Accept(System.String field)
+ {
+ FieldSelectorResult selection = fieldSelections[field];
+ return selection != FieldSelectorResult.INVALID ? selection : FieldSelectorResult.NO_LOAD; // TODO: See FieldSelectorResult
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/NumberTools.cs b/external/Lucene.Net.Light/src/core/Document/NumberTools.cs
new file mode 100644
index 0000000000..f877120841
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/NumberTools.cs
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Search;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+
+namespace Lucene.Net.Documents
+{
+
+ // do not remove this class in 3.0, it may be needed to decode old indexes!
+
+ /// Provides support for converting longs to Strings, and back again. The strings
+ /// are structured so that lexicographic sorting order is preserved.
+ ///
+ ///
+ /// That is, if l1 is less than l2 for any two longs l1 and l2, then
+ /// NumberTools.longToString(l1) is lexicographically less than
+ /// NumberTools.longToString(l2). (Similarly for "greater than" and "equals".)
+ ///
+ ///
+ /// This class handles all long values (unlike
+ /// ).
+ ///
+ ///
+ /// For new indexes use instead, which
+ /// provides a sortable binary representation (prefix encoded) of numeric
+ /// values.
+ /// To index and efficiently query numeric values use
+ /// and .
+ /// This class is included for use with existing
+ /// indices and will be removed in a future release (possibly Lucene 4.0).
+ ///
+ [Obsolete("For new indexes use NumericUtils instead, which provides a sortable binary representation (prefix encoded) of numeric values. To index and efficiently query numeric values use NumericField and NumericRangeQuery. This class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0).")]
+ public class NumberTools
+ {
+
+ private const int RADIX = 36;
+
+ private const char NEGATIVE_PREFIX = '-';
+
+ // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
+ private const char POSITIVE_PREFIX = '0';
+
+ //NB: this must be less than
+ /// Equivalent to longToString(Long.MIN_VALUE)
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000";
+#else
+ public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000000";
+#endif
+
+ /// Equivalent to longToString(Long.MAX_VALUE)
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7";
+#else
+ public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "7fffffffffffffff";
+#endif
+
+ /// The length of (all) strings returned by
+ public static readonly int STR_SIZE = MIN_STRING_VALUE.Length;
+
+ /// Converts a long to a String suitable for indexing.
+ public static System.String LongToString(long l)
+ {
+
+ if (l == System.Int64.MinValue)
+ {
+ // special case, because long is not symmetric around zero
+ return MIN_STRING_VALUE;
+ }
+
+ System.Text.StringBuilder buf = new System.Text.StringBuilder(STR_SIZE);
+
+ if (l < 0)
+ {
+ buf.Append(NEGATIVE_PREFIX);
+ l = System.Int64.MaxValue + l + 1;
+ }
+ else
+ {
+ buf.Append(POSITIVE_PREFIX);
+ }
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ System.String num = ToString(l);
+#else
+ System.String num = System.Convert.ToString(l, RADIX);
+#endif
+
+ int padLen = STR_SIZE - num.Length - buf.Length;
+ while (padLen-- > 0)
+ {
+ buf.Append('0');
+ }
+ buf.Append(num);
+
+ return buf.ToString();
+ }
+
+ /// Converts a String that was returned by back to a
+ /// long.
+ ///
+ ///
+ /// IllegalArgumentException
+ /// if the input is null
+ ///
+ /// NumberFormatException
+ /// if the input does not parse (it was not a String returned by
+ /// longToString()).
+ ///
+ public static long StringToLong(System.String str)
+ {
+ if (str == null)
+ {
+ throw new System.NullReferenceException("string cannot be null");
+ }
+ if (str.Length != STR_SIZE)
+ {
+ throw new System.FormatException("string is the wrong size");
+ }
+
+ if (str.Equals(MIN_STRING_VALUE))
+ {
+ return System.Int64.MinValue;
+ }
+
+ char prefix = str[0];
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ long l = ToLong(str.Substring(1));
+#else
+ long l = System.Convert.ToInt64(str.Substring(1), RADIX);
+#endif
+
+ if (prefix == POSITIVE_PREFIX)
+ {
+ // nop
+ }
+ else if (prefix == NEGATIVE_PREFIX)
+ {
+ l = l - System.Int64.MaxValue - 1;
+ }
+ else
+ {
+ throw new System.FormatException("string does not begin with the correct prefix");
+ }
+
+ return l;
+ }
+
+#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE
+ #region BASE36 OPS
+ static System.String digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+ static long[] powersOf36 =
+ {
+ 1L,
+ 36L,
+ 36L*36L,
+ 36L*36L*36L,
+ 36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L,
+ 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L
+ };
+
+ public static System.String ToString(long lval)
+ {
+ if (lval == 0)
+ {
+ return "0";
+ }
+
+ int maxStrLen = powersOf36.Length;
+ long curval = lval;
+
+ char[] tb = new char[maxStrLen];
+ int outpos = 0;
+ for (int i = 0; i < maxStrLen; i++)
+ {
+ long pval = powersOf36[maxStrLen - i - 1];
+ int pos = (int)(curval / pval);
+ tb[outpos++] = digits.Substring(pos, 1).ToCharArray()[0];
+ curval = curval % pval;
+ }
+ if (outpos == 0)
+ tb[outpos++] = '0';
+ return new System.String(tb, 0, outpos).TrimStart('0');
+ }
+
+ public static long ToLong(System.String t)
+ {
+ long ival = 0;
+ char[] tb = t.ToCharArray();
+ for (int i = 0; i < tb.Length; i++)
+ {
+ ival += powersOf36[i] * digits.IndexOf(tb[tb.Length - i - 1]);
+ }
+ return ival;
+ }
+ #endregion
+#endif
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/NumericField.cs b/external/Lucene.Net.Light/src/core/Document/NumericField.cs
new file mode 100644
index 0000000000..e77dee47d5
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/NumericField.cs
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.IO;
+using Lucene.Net.Search;
+using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+using NumericUtils = Lucene.Net.Util.NumericUtils;
+using FieldCache = Lucene.Net.Search.FieldCache;
+using SortField = Lucene.Net.Search.SortField;
+
+namespace Lucene.Net.Documents
+{
+ // javadocs
+
+ ///
This class provides a that enables indexing
+ /// of numeric values for efficient range filtering and
+ /// sorting. Here's an example usage, adding an int value:
+ ///
+ /// document.add(new NumericField(name).setIntValue(value));
+ ///
+ ///
+ /// For optimal performance, re-use the
+ /// NumericField and instance for more than
+ /// one document:
+ ///
+ ///
+ /// NumericField field = new NumericField(name);
+ /// Document document = new Document();
+ /// document.add(field);
+ ///
+ /// for(all documents) {
+ /// ...
+ /// field.setIntValue(value)
+ /// writer.addDocument(document);
+ /// ...
+ /// }
+ ///
+ ///
+ ///
The .Net native types int , long ,
+ /// float and double are
+ /// directly supported. However, any value that can be
+ /// converted into these native types can also be indexed.
+ /// For example, date/time values represented by a
+ /// can be translated into a long
+ /// value using the java.util.Date.getTime method. If you
+ /// don't need millisecond precision, you can quantize the
+ /// value, either by dividing the result of
+ /// java.util.Date.getTime or using the separate getters
+ /// (for year, month, etc.) to construct an int or
+ /// long value.
+ ///
+ ///
To perform range querying or filtering against a
+ /// NumericField , use or
+ ///. To sort according to a
+ /// NumericField , use the normal numeric sort types, eg
+ /// NumericField values
+ /// can also be loaded directly from .
+ ///
+ ///
By default, a NumericField 's value is not stored but
+ /// is indexed for range filtering and sorting. You can use
+ /// the
+ /// constructor if you need to change these defaults.
+ ///
+ ///
You may add the same field name as a NumericField to
+ /// the same document more than once. Range querying and
+ /// filtering will be the logical OR of all values; so a range query
+ /// will hit all documents that have at least one value in
+ /// the range. However sort behavior is not defined. If you need to sort,
+ /// you should separately index a single-valued NumericField .
+ ///
+ ///
A NumericField will consume somewhat more disk space
+ /// in the index than an ordinary single-valued field.
+ /// However, for a typical index that includes substantial
+ /// textual content per document, this increase will likely
+ /// be in the noise.
+ ///
+ ///
Within Lucene, each numeric value is indexed as a
+ /// trie structure, where each term is logically
+ /// assigned to larger and larger pre-defined brackets (which
+ /// are simply lower-precision representations of the value).
+ /// The step size between each successive bracket is called the
+ /// precisionStep , measured in bits. Smaller
+ /// precisionStep values result in larger number
+ /// of brackets, which consumes more disk space in the index
+ /// but may result in faster range search performance. The
+ /// default value, 4, was selected for a reasonable tradeoff
+ /// of disk space consumption versus performance. You can
+ /// use the expert constructor
+ /// if you'd
+ /// like to change the value. Note that you must also
+ /// specify a congruent value when creating
+ /// or .
+ /// For low cardinality fields larger precision steps are good.
+ /// If the cardinality is < 100, it is fair
+ /// to use , which produces one
+ /// term per value.
+ ///
+ ///
For more information on the internals of numeric trie
+ /// indexing, including the precisionStep
+ /// configuration, see . The format of
+ /// indexed values is described in .
+ ///
+ ///
If you only need to sort by numeric value, and never
+ /// run range querying/filtering, you can index using a
+ /// precisionStep of .
+ /// This will minimize disk space consumed.
+ ///
+ ///
More advanced users can instead use
+ /// directly, when indexing numbers. This
+ /// class is a wrapper around this token stream type for
+ /// easier, more intuitive usage.
+ ///
+ ///
NOTE: This class is only used during
+ /// indexing. When retrieving the stored field value from a
+ /// instance after search, you will get a
+ /// conventional instance where the numeric
+ /// values are returned as s (according to
+ /// toString(value) of the used data type).
+ ///
+ ///
NOTE: This API is
+ /// experimental and might change in incompatible ways in the
+ /// next release.
+ ///
+ ///
+ /// 2.9
+ ///
+ [Serializable]
+ public sealed class NumericField:AbstractField
+ {
+
+ new private readonly NumericTokenStream tokenStream;
+
+ /// Creates a field for numeric values using the default precisionStep
+ /// (4). The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set??? Value() methods.
+ /// This constructor creates an indexed, but not stored field.
+ ///
+ /// the field name
+ ///
+ public NumericField(System.String name):this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true)
+ {
+ }
+
+ /// Creates a field for numeric values using the default precisionStep
+ /// (4). The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set??? Value() methods.
+ ///
+ /// the field name
+ ///
+ /// if the field should be stored in plain text form
+ /// (according to toString(value) of the used data type)
+ ///
+ /// if the field should be indexed using
+ ///
+ public NumericField(System.String name, Field.Store store, bool index):this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index)
+ {
+ }
+
+ /// Creates a field for numeric values with the specified
+ /// precisionStep . The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set??? Value() methods.
+ /// This constructor creates an indexed, but not stored field.
+ ///
+ /// the field name
+ ///
+ /// the used precision step
+ ///
+ public NumericField(System.String name, int precisionStep):this(name, precisionStep, Field.Store.NO, true)
+ {
+ }
+
+ /// Creates a field for numeric values with the specified
+ /// precisionStep . The instance is not yet initialized with
+ /// a numeric value, before indexing a document containing this field,
+ /// set a value using the various set??? Value() methods.
+ ///
+ /// the field name
+ ///
+ /// the used precision step
+ ///
+ /// if the field should be stored in plain text form
+ /// (according to toString(value) of the used data type)
+ ///
+ /// if the field should be indexed using
+ ///
+ public NumericField(System.String name, int precisionStep, Field.Store store, bool index):base(name, store, index?Field.Index.ANALYZED_NO_NORMS:Field.Index.NO, Field.TermVector.NO)
+ {
+ OmitTermFreqAndPositions = true;
+ tokenStream = new NumericTokenStream(precisionStep);
+ }
+
+ /// Returns a for indexing the numeric value.
+ public override TokenStream TokenStreamValue
+ {
+ get { return IsIndexed ? tokenStream : null; }
+ }
+
+ /// Returns always null for numeric fields
+ public override byte[] GetBinaryValue(byte[] result)
+ {
+ return null;
+ }
+
+ /// Returns always null for numeric fields
+ public override TextReader ReaderValue
+ {
+ get { return null; }
+ }
+
+ /// Returns the numeric value as a string (how it is stored, when is chosen).
+ public override string StringValue
+ {
+ get { return (fieldsData == null) ? null : fieldsData.ToString(); }
+ }
+
+ /// Returns the current numeric value as a subclass of , null if not yet initialized.
+ public ValueType NumericValue
+ {
+ get { return (System.ValueType) fieldsData; }
+ }
+
+ /// Initializes the field with the supplied long value.
+ /// the numeric value
+ ///
+ /// this instance, because of this you can use it the following way:
+ /// document.add(new NumericField(name, precisionStep).SetLongValue(value))
+ ///
+ public NumericField SetLongValue(long value_Renamed)
+ {
+ tokenStream.SetLongValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// Initializes the field with the supplied int value.
+ /// the numeric value
+ ///
+ /// this instance, because of this you can use it the following way:
+ /// document.add(new NumericField(name, precisionStep).setIntValue(value))
+ ///
+ public NumericField SetIntValue(int value_Renamed)
+ {
+ tokenStream.SetIntValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// Initializes the field with the supplied double value.
+ /// the numeric value
+ ///
+ /// this instance, because of this you can use it the following way:
+ /// document.add(new NumericField(name, precisionStep).setDoubleValue(value))
+ ///
+ public NumericField SetDoubleValue(double value_Renamed)
+ {
+ tokenStream.SetDoubleValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+
+ /// Initializes the field with the supplied float value.
+ /// the numeric value
+ ///
+ /// this instance, because of this you can use it the following way:
+ /// document.add(new NumericField(name, precisionStep).setFloatValue(value))
+ ///
+ public NumericField SetFloatValue(float value_Renamed)
+ {
+ tokenStream.SetFloatValue(value_Renamed);
+ fieldsData = value_Renamed;
+ return this;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Document/SetBasedFieldSelector.cs b/external/Lucene.Net.Light/src/core/Document/SetBasedFieldSelector.cs
new file mode 100644
index 0000000000..14e3e02cd8
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Document/SetBasedFieldSelector.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Documents
+{
+ /// Declare what fields to load normally and what fields to load lazily
+ ///
+ ///
+ ///
+ [Serializable]
+ public class SetBasedFieldSelector : FieldSelector
+ {
+ private ISet fieldsToLoad;
+ private ISet lazyFieldsToLoad;
+
+ /// Pass in the Set of names to load and the Set of names to load lazily. If both are null, the
+ /// Document will not have any on it.
+ ///
+ /// A Set of field names to load. May be empty, but not null
+ ///
+ /// A Set of field names to load lazily. May be empty, but not null
+ ///
+ public SetBasedFieldSelector(ISet fieldsToLoad, ISet lazyFieldsToLoad)
+ {
+ this.fieldsToLoad = fieldsToLoad;
+ this.lazyFieldsToLoad = lazyFieldsToLoad;
+ }
+
+ /// Indicate whether to load the field with the given name or not. If the is not in either of the
+ /// initializing Sets, then is returned. If a Field name
+ /// is in both fieldsToLoad and lazyFieldsToLoad , lazy has precedence.
+ ///
+ ///
+ /// The name to check
+ ///
+ /// The
+ ///
+ public virtual FieldSelectorResult Accept(System.String fieldName)
+ {
+ FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
+ if (fieldsToLoad.Contains(fieldName) == true)
+ {
+ result = FieldSelectorResult.LOAD;
+ }
+ if (lazyFieldsToLoad.Contains(fieldName) == true)
+ {
+ result = FieldSelectorResult.LAZY_LOAD;
+ }
+ return result;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/AbstractAllTermDocs.cs b/external/Lucene.Net.Light/src/core/Index/AbstractAllTermDocs.cs
new file mode 100644
index 0000000000..935b7faad7
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/AbstractAllTermDocs.cs
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+ ///
+ /// Base class for enumerating all but deleted docs.
+ ///
+ ///
NOTE: this class is meant only to be used internally
+ /// by Lucene; it's only public so it can be shared across
+ /// packages. This means the API is freely subject to
+ /// change, and, the class could be removed entirely, in any
+ /// Lucene release. Use directly at your own risk! */
+ ///
+ public abstract class AbstractAllTermDocs : TermDocs
+ {
+ protected int maxDoc;
+ protected int internalDoc = -1;
+
+ protected AbstractAllTermDocs(int maxDoc)
+ {
+ this.maxDoc = maxDoc;
+ }
+
+ public void Seek(Term term)
+ {
+ if (term == null)
+ {
+ internalDoc = -1;
+ }
+ else
+ {
+ throw new NotSupportedException();
+ }
+ }
+
+ public void Seek(TermEnum termEnum)
+ {
+ throw new NotSupportedException();
+ }
+
+ public int Doc
+ {
+ get { return internalDoc; }
+ }
+
+ public int Freq
+ {
+ get { return 1; }
+ }
+
+ public bool Next()
+ {
+ return SkipTo(internalDoc + 1);
+ }
+
+ public int Read(int[] docs, int[] freqs)
+ {
+ int length = docs.Length;
+ int i = 0;
+ while (i < length && internalDoc < maxDoc)
+ {
+ if (!IsDeleted(internalDoc))
+ {
+ docs[i] = internalDoc;
+ freqs[i] = 1;
+ ++i;
+ }
+ internalDoc++;
+ }
+ return i;
+ }
+
+ public bool SkipTo(int target)
+ {
+ internalDoc = target;
+ while (internalDoc < maxDoc)
+ {
+ if (!IsDeleted(internalDoc))
+ {
+ return true;
+ }
+ internalDoc++;
+ }
+ return false;
+ }
+
+ public void Close()
+ {
+ Dispose();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected abstract void Dispose(bool disposing);
+
+ public abstract bool IsDeleted(int doc);
+ }
+}
diff --git a/external/Lucene.Net.Light/src/core/Index/AllTermDocs.cs b/external/Lucene.Net.Light/src/core/Index/AllTermDocs.cs
new file mode 100644
index 0000000000..da5f16dd56
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/AllTermDocs.cs
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using BitVector = Lucene.Net.Util.BitVector;
+
+namespace Lucene.Net.Index
+{
+
+ class AllTermDocs : AbstractAllTermDocs
+ {
+ protected internal BitVector deletedDocs;
+
+ protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc)
+ {
+ lock (parent)
+ {
+ this.deletedDocs = parent.deletedDocs;
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing.
+ }
+
+ public override bool IsDeleted(int doc)
+ {
+ return deletedDocs != null && deletedDocs.Get(doc);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/BufferedDeletes.cs b/external/Lucene.Net.Light/src/core/Index/BufferedDeletes.cs
new file mode 100644
index 0000000000..52ef1dfdd1
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/BufferedDeletes.cs
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// Holds buffered deletes, by docID, term or query. We
+ /// hold two instances of this class: one for the deletes
+ /// prior to the last flush, the other for deletes after
+ /// the last flush. This is so if we need to abort
+ /// (discard all buffered docs) we can also discard the
+ /// buffered deletes yet keep the deletes done during
+ /// previously flushed segments.
+ ///
+ class BufferedDeletes
+ {
+ internal int numTerms;
+ internal IDictionary terms = null;
+ internal IDictionary queries = new HashMap();
+ internal List docIDs = new List();
+ internal long bytesUsed;
+ internal bool doTermSort;
+
+ public BufferedDeletes(bool doTermSort)
+ {
+ this.doTermSort = doTermSort;
+ if (doTermSort)
+ {
+ //TODO: Used in place of TreeMap
+ terms = new SortedDictionary();
+ }
+ else
+ {
+ terms = new HashMap();
+ }
+ }
+
+
+ // Number of documents a delete term applies to.
+ internal sealed class Num
+ {
+ internal int num;
+
+ internal Num(int num)
+ {
+ this.num = num;
+ }
+
+ internal int GetNum()
+ {
+ return num;
+ }
+
+ internal void SetNum(int num)
+ {
+ // Only record the new number if it's greater than the
+ // current one. This is important because if multiple
+ // threads are replacing the same doc at nearly the
+ // same time, it's possible that one thread that got a
+ // higher docID is scheduled before the other
+ // threads.
+ if (num > this.num)
+ this.num = num;
+ }
+ }
+
+ internal virtual int Size()
+ {
+ // We use numTerms not terms.size() intentionally, so
+ // that deletes by the same term multiple times "count",
+ // ie if you ask to flush every 1000 deletes then even
+ // dup'd terms are counted towards that 1000
+ return numTerms + queries.Count + docIDs.Count;
+ }
+
+ internal virtual void Update(BufferedDeletes @in)
+ {
+ numTerms += @in.numTerms;
+ bytesUsed += @in.bytesUsed;
+ foreach (KeyValuePair term in @in.terms)
+ {
+ terms[term.Key] = term.Value;
+ }
+ foreach (KeyValuePair term in @in.queries)
+ {
+ queries[term.Key] = term.Value;
+ }
+
+ docIDs.AddRange(@in.docIDs);
+ @in.Clear();
+ }
+
+ internal virtual void Clear()
+ {
+ terms.Clear();
+ queries.Clear();
+ docIDs.Clear();
+ numTerms = 0;
+ bytesUsed = 0;
+ }
+
+ internal virtual void AddBytesUsed(long b)
+ {
+ bytesUsed += b;
+ }
+
+ internal virtual bool Any()
+ {
+ return terms.Count > 0 || docIDs.Count > 0 || queries.Count > 0;
+ }
+
+ // Remaps all buffered deletes based on a completed
+ // merge
+ internal virtual void Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
+ {
+ lock (this)
+ {
+ IDictionary newDeleteTerms;
+
+ // Remap delete-by-term
+ if (terms.Count > 0)
+ {
+ if (doTermSort)
+ {
+ newDeleteTerms = new SortedDictionary();
+ }
+ else
+ {
+ newDeleteTerms = new HashMap();
+ }
+ foreach(var entry in terms)
+ {
+ Num num = entry.Value;
+ newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum()));
+ }
+ }
+ else
+ newDeleteTerms = null;
+
+ // Remap delete-by-docID
+ List newDeleteDocIDs;
+
+ if (docIDs.Count > 0)
+ {
+ newDeleteDocIDs = new List(docIDs.Count);
+ foreach(int num in docIDs)
+ {
+ newDeleteDocIDs.Add(mapper.Remap(num));
+ }
+ }
+ else
+ newDeleteDocIDs = null;
+
+ // Remap delete-by-query
+ HashMap newDeleteQueries;
+
+ if (queries.Count > 0)
+ {
+ newDeleteQueries = new HashMap(queries.Count);
+ foreach(var entry in queries)
+ {
+ int num = entry.Value;
+ newDeleteQueries[entry.Key] = mapper.Remap(num);
+ }
+ }
+ else
+ newDeleteQueries = null;
+
+ if (newDeleteTerms != null)
+ terms = newDeleteTerms;
+ if (newDeleteDocIDs != null)
+ docIDs = newDeleteDocIDs;
+ if (newDeleteQueries != null)
+ queries = newDeleteQueries;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/ByteBlockPool.cs b/external/Lucene.Net.Light/src/core/Index/ByteBlockPool.cs
new file mode 100644
index 0000000000..041c756fbc
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/ByteBlockPool.cs
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/* Class that Posting and PostingVector use to write byte
+* streams into shared fixed-size byte[] arrays. The idea
+* is to allocate slices of increasing lengths For
+* example, the first slice is 5 bytes, the next slice is
+* 14, etc. We start by writing our bytes into the first
+* 5 bytes. When we hit the end of the slice, we allocate
+* the next slice and then write the address of the new
+* slice into the last 4 bytes of the previous slice (the
+* "forwarding address").
+*
+* Each slice is filled with 0's initially, and we mark
+* the end with a non-zero byte. This way the methods
+* that are writing into the slice don't need to record
+* its length and instead allocate a new slice once they
+* hit a non-zero byte. */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ sealed public class ByteBlockPool
+ {
+ private void InitBlock()
+ {
+ byteUpto = DocumentsWriter.BYTE_BLOCK_SIZE;
+ }
+
+ public /*internal*/ abstract class Allocator
+ {
+ public /*internal*/ abstract void RecycleByteBlocks(byte[][] blocks, int start, int end);
+ public /*internal*/ abstract void RecycleByteBlocks(IList blocks);
+ public /*internal*/ abstract byte[] GetByteBlock(bool trackAllocations);
+ }
+
+ public byte[][] buffers = new byte[10][];
+
+ internal int bufferUpto = - 1; // Which buffer we are upto
+ public int byteUpto; // Where we are in head buffer
+
+ public byte[] buffer; // Current head buffer
+ public int byteOffset = - DocumentsWriter.BYTE_BLOCK_SIZE; // Current head offset
+
+ private readonly bool trackAllocations;
+ private readonly Allocator allocator;
+
+ public ByteBlockPool(Allocator allocator, bool trackAllocations)
+ {
+ InitBlock();
+ this.allocator = allocator;
+ this.trackAllocations = trackAllocations;
+ }
+
+ public void Reset()
+ {
+ if (bufferUpto != - 1)
+ {
+ // We allocated at least one buffer
+
+ for (int i = 0; i < bufferUpto; i++)
+ // Fully zero fill buffers that we fully used
+ System.Array.Clear(buffers[i], 0, buffers[i].Length);
+
+ // Partial zero fill the final buffer
+ System.Array.Clear(buffers[bufferUpto], 0, byteUpto);
+
+ if (bufferUpto > 0)
+ // Recycle all but the first buffer
+ allocator.RecycleByteBlocks(buffers, 1, 1 + bufferUpto);
+
+ // Re-use the first buffer
+ bufferUpto = 0;
+ byteUpto = 0;
+ byteOffset = 0;
+ buffer = buffers[0];
+ }
+ }
+
+ public void NextBuffer()
+ {
+ if (1 + bufferUpto == buffers.Length)
+ {
+ var newBuffers = new byte[(int) (buffers.Length * 1.5)][];
+ Array.Copy(buffers, 0, newBuffers, 0, buffers.Length);
+ buffers = newBuffers;
+ }
+ buffer = buffers[1 + bufferUpto] = allocator.GetByteBlock(trackAllocations);
+ bufferUpto++;
+
+ byteUpto = 0;
+ byteOffset += DocumentsWriter.BYTE_BLOCK_SIZE;
+ }
+
+ public int NewSlice(int size)
+ {
+ if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE - size)
+ NextBuffer();
+ int upto = byteUpto;
+ byteUpto += size;
+ buffer[byteUpto - 1] = 16;
+ return upto;
+ }
+
+ // Size of each slice. These arrays should be at most 16
+ // elements (index is encoded with 4 bits). First array
+ // is just a compact way to encode X+1 with a max. Second
+ // array is the length of each slice, ie first slice is 5
+ // bytes, next slice is 14 bytes, etc.
+ internal static readonly int[] nextLevelArray = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
+ internal static readonly int[] levelSizeArray = new int[]{5, 14, 20, 30, 40, 40, 80, 80, 120, 200};
+ internal static readonly int FIRST_LEVEL_SIZE = levelSizeArray[0];
+ public readonly static int FIRST_LEVEL_SIZE_For_NUnit_Test = levelSizeArray[0];
+
+ public int AllocSlice(byte[] slice, int upto)
+ {
+
+ int level = slice[upto] & 15;
+ int newLevel = nextLevelArray[level];
+ int newSize = levelSizeArray[newLevel];
+
+ // Maybe allocate another block
+ if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE - newSize)
+ NextBuffer();
+
+ int newUpto = byteUpto;
+ int offset = newUpto + byteOffset;
+ byteUpto += newSize;
+
+ // Copy forward the past 3 bytes (which we are about
+ // to overwrite with the forwarding address):
+ buffer[newUpto] = slice[upto - 3];
+ buffer[newUpto + 1] = slice[upto - 2];
+ buffer[newUpto + 2] = slice[upto - 1];
+
+ // Write forwarding address at end of last slice:
+ slice[upto - 3] = (byte) (Number.URShift(offset, 24));
+ slice[upto - 2] = (byte) (Number.URShift(offset, 16));
+ slice[upto - 1] = (byte) (Number.URShift(offset, 8));
+ slice[upto] = (byte) offset;
+
+ // Write new level:
+ buffer[byteUpto - 1] = (byte) (16 | newLevel);
+
+ return newUpto + 3;
+ }
+
+ public static int FIRST_LEVEL_SIZE_ForNUnit
+ {
+ get { return FIRST_LEVEL_SIZE; }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/ByteSliceReader.cs b/external/Lucene.Net.Light/src/core/Index/ByteSliceReader.cs
new file mode 100644
index 0000000000..8b672fe37f
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/ByteSliceReader.cs
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+ /* IndexInput that knows how to read the byte slices written
+ * by Posting and PostingVector. We read the bytes in
+ * each slice until we hit the end of that slice at which
+ * point we read the forwarding address of the next slice
+ * and then jump to it.*/
+ public sealed class ByteSliceReader : IndexInput
+ {
+ internal ByteBlockPool pool;
+ internal int bufferUpto;
+ internal byte[] buffer;
+ public int upto;
+ internal int limit;
+ internal int level;
+ public int bufferOffset;
+
+ public int endIndex;
+
+ public void Init(ByteBlockPool pool, int startIndex, int endIndex)
+ {
+
+ System.Diagnostics.Debug.Assert(endIndex - startIndex >= 0);
+ System.Diagnostics.Debug.Assert(startIndex >= 0);
+ System.Diagnostics.Debug.Assert(endIndex >= 0);
+
+ this.pool = pool;
+ this.endIndex = endIndex;
+
+ level = 0;
+ bufferUpto = startIndex / DocumentsWriter.BYTE_BLOCK_SIZE;
+ bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE;
+ buffer = pool.buffers[bufferUpto];
+ upto = startIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+
+ int firstSize = ByteBlockPool.levelSizeArray[0];
+
+ if (startIndex + firstSize >= endIndex)
+ {
+ // There is only this one slice to read
+ limit = endIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+ }
+ else
+ limit = upto + firstSize - 4;
+ }
+
+ public bool Eof()
+ {
+ System.Diagnostics.Debug.Assert(upto + bufferOffset <= endIndex);
+ return upto + bufferOffset == endIndex;
+ }
+
+ public override byte ReadByte()
+ {
+ System.Diagnostics.Debug.Assert(!Eof());
+ System.Diagnostics.Debug.Assert(upto <= limit);
+ if (upto == limit)
+ NextSlice();
+ return buffer[upto++];
+ }
+
+ public long WriteTo(IndexOutput @out)
+ {
+ long size = 0;
+ while (true)
+ {
+ if (limit + bufferOffset == endIndex)
+ {
+ System.Diagnostics.Debug.Assert(endIndex - bufferOffset >= upto);
+ @out.WriteBytes(buffer, upto, limit - upto);
+ size += limit - upto;
+ break;
+ }
+ else
+ {
+ @out.WriteBytes(buffer, upto, limit - upto);
+ size += limit - upto;
+ NextSlice();
+ }
+ }
+
+ return size;
+ }
+
+ public void NextSlice()
+ {
+
+ // Skip to our next slice
+ int nextIndex = ((buffer[limit] & 0xff) << 24) + ((buffer[1 + limit] & 0xff) << 16) + ((buffer[2 + limit] & 0xff) << 8) + (buffer[3 + limit] & 0xff);
+
+ level = ByteBlockPool.nextLevelArray[level];
+ int newSize = ByteBlockPool.levelSizeArray[level];
+
+ bufferUpto = nextIndex / DocumentsWriter.BYTE_BLOCK_SIZE;
+ bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE;
+
+ buffer = pool.buffers[bufferUpto];
+ upto = nextIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+
+ if (nextIndex + newSize >= endIndex)
+ {
+ // We are advancing to the final slice
+ System.Diagnostics.Debug.Assert(endIndex - nextIndex > 0);
+ limit = endIndex - bufferOffset;
+ }
+ else
+ {
+ // This is not the final slice (subtract 4 for the
+ // forwarding address at the end of this new slice)
+ limit = upto + newSize - 4;
+ }
+ }
+
+ public override void ReadBytes(byte[] b, int offset, int len)
+ {
+ while (len > 0)
+ {
+ int numLeft = limit - upto;
+ if (numLeft < len)
+ {
+ // Read entire slice
+ Array.Copy(buffer, upto, b, offset, numLeft);
+ offset += numLeft;
+ len -= numLeft;
+ NextSlice();
+ }
+ else
+ {
+ // This slice is the last one
+ Array.Copy(buffer, upto, b, offset, len);
+ upto += len;
+ break;
+ }
+ }
+ }
+
+ public override long FilePointer
+ {
+ get { throw new NotImplementedException(); }
+ }
+
+ public override long Length()
+ {
+ throw new NotImplementedException();
+ }
+ public override void Seek(long pos)
+ {
+ throw new NotImplementedException();
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ // Do nothing...
+ }
+
+ override public Object Clone()
+ {
+ System.Diagnostics.Debug.Fail("Port issue:", "Let see if we need this ByteSliceReader.Clone()"); // {{Aroush-2.9}}
+ return null;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/ByteSliceWriter.cs b/external/Lucene.Net.Light/src/core/Index/ByteSliceWriter.cs
new file mode 100644
index 0000000000..86bbca0e73
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/ByteSliceWriter.cs
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+ /// Class to write byte streams into slices of shared
+ /// byte[]. This is used by DocumentsWriter to hold the
+ /// posting list for many terms in RAM.
+ ///
+ public sealed class ByteSliceWriter
+ {
+ private byte[] slice;
+ private int upto;
+ private readonly ByteBlockPool pool;
+
+ internal int offset0;
+
+ public ByteSliceWriter(ByteBlockPool pool)
+ {
+ this.pool = pool;
+ }
+
+ /// Set up the writer to write at address.
+ public void Init(int address)
+ {
+ slice = pool.buffers[address >> DocumentsWriter.BYTE_BLOCK_SHIFT];
+ System.Diagnostics.Debug.Assert(slice != null);
+ upto = address & DocumentsWriter.BYTE_BLOCK_MASK;
+ offset0 = address;
+ System.Diagnostics.Debug.Assert(upto < slice.Length);
+ }
+
+ /// Write byte into byte slice stream
+ public void WriteByte(byte b)
+ {
+ System.Diagnostics.Debug.Assert(slice != null);
+ if (slice[upto] != 0)
+ {
+ upto = pool.AllocSlice(slice, upto);
+ slice = pool.buffer;
+ offset0 = pool.byteOffset;
+ System.Diagnostics.Debug.Assert(slice != null);
+ }
+ slice[upto++] = b;
+ System.Diagnostics.Debug.Assert(upto != slice.Length);
+ }
+
+ public void WriteBytes(byte[] b, int offset, int len)
+ {
+ int offsetEnd = offset + len;
+ while (offset < offsetEnd)
+ {
+ if (slice[upto] != 0)
+ {
+ // End marker
+ upto = pool.AllocSlice(slice, upto);
+ slice = pool.buffer;
+ offset0 = pool.byteOffset;
+ }
+
+ slice[upto++] = b[offset++];
+ System.Diagnostics.Debug.Assert(upto != slice.Length);
+ }
+ }
+
+ public int Address
+ {
+ get { return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); }
+ }
+
+ public void WriteVInt(int i)
+ {
+ while ((i & ~ 0x7F) != 0)
+ {
+ WriteByte((byte) ((i & 0x7f) | 0x80));
+ i = Number.URShift(i, 7);
+ }
+ WriteByte((byte) i);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/CharBlockPool.cs b/external/Lucene.Net.Light/src/core/Index/CharBlockPool.cs
new file mode 100644
index 0000000000..0631fe0e92
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/CharBlockPool.cs
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class CharBlockPool
+ {
+ private void InitBlock()
+ {
+ charUpto = DocumentsWriter.CHAR_BLOCK_SIZE;
+ }
+
+ public char[][] buffers = new char[10][];
+ internal int numBuffer;
+
+ internal int bufferUpto = - 1; // Which buffer we are upto
+ public int charUpto; // Where we are in head buffer
+
+ public char[] buffer; // Current head buffer
+ public int charOffset = - DocumentsWriter.CHAR_BLOCK_SIZE; // Current head offset
+ private readonly DocumentsWriter docWriter;
+
+ public CharBlockPool(DocumentsWriter docWriter)
+ {
+ InitBlock();
+ this.docWriter = docWriter;
+ }
+
+ public void Reset()
+ {
+ docWriter.RecycleCharBlocks(buffers, 1 + bufferUpto);
+ bufferUpto = - 1;
+ charUpto = DocumentsWriter.CHAR_BLOCK_SIZE;
+ charOffset = - DocumentsWriter.CHAR_BLOCK_SIZE;
+ }
+
+ public void NextBuffer()
+ {
+ if (1 + bufferUpto == buffers.Length)
+ {
+ var newBuffers = new char[(int) (buffers.Length * 1.5)][];
+ Array.Copy(buffers, 0, newBuffers, 0, buffers.Length);
+ buffers = newBuffers;
+ }
+ buffer = buffers[1 + bufferUpto] = docWriter.GetCharBlock();
+ bufferUpto++;
+
+ charUpto = 0;
+ charOffset += DocumentsWriter.CHAR_BLOCK_SIZE;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/CheckIndex.cs b/external/Lucene.Net.Light/src/core/Index/CheckIndex.cs
new file mode 100644
index 0000000000..89179036a6
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/CheckIndex.cs
@@ -0,0 +1,1017 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Support;
+using AbstractField = Lucene.Net.Documents.AbstractField;
+using Document = Lucene.Net.Documents.Document;
+using Directory = Lucene.Net.Store.Directory;
+using FSDirectory = Lucene.Net.Store.FSDirectory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+
+ /// Basic tool and API to check the health of an index and
+ /// write a new segments file that removes reference to
+ /// problematic segments.
+ ///
+ ///
As this tool checks every byte in the index, on a large
+ /// index it can take quite a long time to run.
+ ///
+ ///
WARNING : this tool and API is new and
+ /// experimental and is subject to suddenly change in the
+ /// next release. Please make a complete backup of your
+ /// index before using this to fix your index!
+ ///
+ public class CheckIndex
+ {
+ private StreamWriter infoStream;
+ private readonly Directory dir;
+
+ /// Returned from detailing the health and status of the index.
+ ///
+ ///
WARNING : this API is new and experimental and is
+ /// subject to suddenly change in the next release.
+ ///
+ ///
+
+ public class Status
+ {
+
+ /// True if no problems were found with the index.
+ public bool clean;
+
+ /// True if we were unable to locate and load the segments_N file.
+ public bool missingSegments;
+
+ /// True if we were unable to open the segments_N file.
+ public bool cantOpenSegments;
+
+ /// True if we were unable to read the version number from segments_N file.
+ public bool missingSegmentVersion;
+
+ /// Name of latest segments_N file in the index.
+ public System.String segmentsFileName;
+
+ /// Number of segments in the index.
+ public int numSegments;
+
+ /// String description of the version of the index.
+ public System.String segmentFormat;
+
+ /// Empty unless you passed specific segments list to check as optional 3rd argument.
+ ///
+ /// CheckIndex.CheckIndex_Renamed_Method(System.Collections.IList)
+ ///
+ public List segmentsChecked = new List();
+
+ /// True if the index was created with a newer version of Lucene than the CheckIndex tool.
+ public bool toolOutOfDate;
+
+ /// List of instances, detailing status of each segment.
+ public IList segmentInfos = new List();
+
+ /// Directory index is in.
+ public Directory dir;
+
+ /// SegmentInfos instance containing only segments that
+ /// had no problems (this is used with the
+ /// method to repair the index.
+ ///
+ internal SegmentInfos newSegments;
+
+ /// How many documents will be lost to bad segments.
+ public int totLoseDocCount;
+
+ /// How many bad segments were found.
+ public int numBadSegments;
+
+ /// True if we checked only specific segments ( )
+ /// was called with non-null
+ /// argument).
+ ///
+ public bool partial;
+
+ /// Holds the userData of the last commit in the index
+ public IDictionary userData;
+
+ /// Holds the status of each segment in the index.
+ /// See .
+ ///
+ ///
WARNING : this API is new and experimental and is
+ /// subject to suddenly change in the next release.
+ ///
+ public class SegmentInfoStatus
+ {
+ /// Name of the segment.
+ public System.String name;
+
+ /// Document count (does not take deletions into account).
+ public int docCount;
+
+ /// True if segment is compound file format.
+ public bool compound;
+
+ /// Number of files referenced by this segment.
+ public int numFiles;
+
+ /// Net size (MB) of the files referenced by this
+ /// segment.
+ ///
+ public double sizeMB;
+
+ /// Doc store offset, if this segment shares the doc
+ /// store files (stored fields and term vectors) with
+ /// other segments. This is -1 if it does not share.
+ ///
+ public int docStoreOffset = - 1;
+
+ /// String of the shared doc store segment, or null if
+ /// this segment does not share the doc store files.
+ ///
+ public System.String docStoreSegment;
+
+ /// True if the shared doc store files are compound file
+ /// format.
+ ///
+ public bool docStoreCompoundFile;
+
+ /// True if this segment has pending deletions.
+ public bool hasDeletions;
+
+ /// Name of the current deletions file name.
+ public System.String deletionsFileName;
+
+ /// Number of deleted documents.
+ public int numDeleted;
+
+ /// True if we were able to open a SegmentReader on this
+ /// segment.
+ ///
+ public bool openReaderPassed;
+
+ /// Number of fields in this segment.
+ internal int numFields;
+
+ /// True if at least one of the fields in this segment
+ /// does not omitTermFreqAndPositions.
+ ///
+ ///
+ ///
+ public bool hasProx;
+
+ /// Map<String, String> that includes certain
+ /// debugging details that IndexWriter records into
+ /// each segment it creates
+ ///
+ public IDictionary diagnostics;
+
+ /// Status for testing of field norms (null if field norms could not be tested).
+ public FieldNormStatus fieldNormStatus;
+
+ /// Status for testing of indexed terms (null if indexed terms could not be tested).
+ public TermIndexStatus termIndexStatus;
+
+ /// Status for testing of stored fields (null if stored fields could not be tested).
+ public StoredFieldStatus storedFieldStatus;
+
+ /// Status for testing of term vectors (null if term vectors could not be tested).
+ public TermVectorStatus termVectorStatus;
+ }
+
+ /// Status from testing field norms.
+ public sealed class FieldNormStatus
+ {
+ /// Number of fields successfully tested
+ public long totFields = 0L;
+
+ /// Exception thrown during term index test (null on success)
+ public System.Exception error = null;
+ }
+
+ /// Status from testing term index.
+ public sealed class TermIndexStatus
+ {
+ /// Total term count
+ public long termCount = 0L;
+
+ /// Total frequency across all terms.
+ public long totFreq = 0L;
+
+ /// Total number of positions.
+ public long totPos = 0L;
+
+ /// Exception thrown during term index test (null on success)
+ public System.Exception error = null;
+ }
+
+ /// Status from testing stored fields.
+ public sealed class StoredFieldStatus
+ {
+
+ /// Number of documents tested.
+ public int docCount = 0;
+
+ /// Total number of stored fields tested.
+ public long totFields = 0;
+
+ /// Exception thrown during stored fields test (null on success)
+ public System.Exception error = null;
+ }
+
+ /// Status from testing stored fields.
+ public sealed class TermVectorStatus
+ {
+
+ /// Number of documents tested.
+ public int docCount = 0;
+
+ /// Total number of term vectors tested.
+ public long totVectors = 0;
+
+ /// Exception thrown during term vector test (null on success)
+ public System.Exception error = null;
+ }
+ }
+
+ /// Create a new CheckIndex on the directory.
+ public CheckIndex(Directory dir)
+ {
+ this.dir = dir;
+ infoStream = null;
+ }
+
+ /// Set infoStream where messages should go. If null, no
+ /// messages are printed
+ ///
+ public virtual void SetInfoStream(StreamWriter @out)
+ {
+ infoStream = @out;
+ }
+
+ private void Msg(System.String msg)
+ {
+ if (infoStream != null)
+ infoStream.WriteLine(msg);
+ }
+
+ private class MySegmentTermDocs:SegmentTermDocs
+ {
+
+ internal int delCount;
+
+ internal MySegmentTermDocs(SegmentReader p):base(p)
+ {
+ }
+
+ public override void Seek(Term term)
+ {
+ base.Seek(term);
+ delCount = 0;
+ }
+
+ protected internal override void SkippingDoc()
+ {
+ delCount++;
+ }
+ }
+
+ /// Returns a instance detailing
+ /// the state of the index.
+ ///
+ ///
As this method checks every byte in the index, on a large
+ /// index it can take quite a long time to run.
+ ///
+ ///
WARNING : make sure
+ /// you only call this when the index is not opened by any
+ /// writer.
+ ///
+ public virtual Status CheckIndex_Renamed_Method()
+ {
+ return CheckIndex_Renamed_Method(null);
+ }
+
+ /// Returns a instance detailing
+ /// the state of the index.
+ ///
+ ///
+ /// list of specific segment names to check
+ ///
+ ///
As this method checks every byte in the specified
+ /// segments, on a large index it can take quite a long
+ /// time to run.
+ ///
+ ///
WARNING : make sure
+ /// you only call this when the index is not opened by any
+ /// writer.
+ ///
+ public virtual Status CheckIndex_Renamed_Method(List onlySegments)
+ {
+ System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
+ SegmentInfos sis = new SegmentInfos();
+ Status result = new Status();
+ result.dir = dir;
+ try
+ {
+ sis.Read(dir);
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not read any segments file in directory");
+ result.missingSegments = true;
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ return result;
+ }
+
+ int numSegments = sis.Count;
+ var segmentsFileName = sis.GetCurrentSegmentFileName();
+ IndexInput input = null;
+ try
+ {
+ input = dir.OpenInput(segmentsFileName);
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not open segments file in directory");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ result.cantOpenSegments = true;
+ return result;
+ }
+ int format = 0;
+ try
+ {
+ format = input.ReadInt();
+ }
+ catch (System.Exception t)
+ {
+ Msg("ERROR: could not read segment file version in directory");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ result.missingSegmentVersion = true;
+ return result;
+ }
+ finally
+ {
+ if (input != null)
+ input.Close();
+ }
+
+ System.String sFormat = "";
+ bool skip = false;
+
+ if (format == SegmentInfos.FORMAT)
+ sFormat = "FORMAT [Lucene Pre-2.1]";
+ if (format == SegmentInfos.FORMAT_LOCKLESS)
+ sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
+ else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
+ sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
+ else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
+ sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
+ else
+ {
+ if (format == SegmentInfos.FORMAT_CHECKSUM)
+ sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_DEL_COUNT)
+ sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_HAS_PROX)
+ sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
+ else if (format == SegmentInfos.FORMAT_USER_DATA)
+ sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
+ else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
+ sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
+ else if (format < SegmentInfos.CURRENT_FORMAT)
+ {
+ sFormat = "int=" + format + " [newer version of Lucene than this tool]";
+ skip = true;
+ }
+ else
+ {
+ sFormat = format + " [Lucene 1.3 or prior]";
+ }
+ }
+
+ result.segmentsFileName = segmentsFileName;
+ result.numSegments = numSegments;
+ result.segmentFormat = sFormat;
+ result.userData = sis.UserData;
+ System.String userDataString;
+ if (sis.UserData.Count > 0)
+ {
+ userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData);
+ }
+ else
+ {
+ userDataString = "";
+ }
+
+ Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
+
+ if (onlySegments != null)
+ {
+ result.partial = true;
+ if (infoStream != null)
+ infoStream.Write("\nChecking only these segments:");
+ foreach(string s in onlySegments)
+ {
+ if (infoStream != null)
+ {
+ infoStream.Write(" " + s);
+ }
+ }
+ result.segmentsChecked.AddRange(onlySegments);
+ Msg(":");
+ }
+
+ if (skip)
+ {
+ Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
+ result.toolOutOfDate = true;
+ return result;
+ }
+
+
+ result.newSegments = (SegmentInfos) sis.Clone();
+ result.newSegments.Clear();
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ SegmentInfo info = sis.Info(i);
+ if (onlySegments != null && !onlySegments.Contains(info.name))
+ continue;
+ var segInfoStat = new Status.SegmentInfoStatus();
+ result.segmentInfos.Add(segInfoStat);
+ Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
+ segInfoStat.name = info.name;
+ segInfoStat.docCount = info.docCount;
+
+ int toLoseDocCount = info.docCount;
+
+ SegmentReader reader = null;
+
+ try
+ {
+ Msg(" compound=" + info.GetUseCompoundFile());
+ segInfoStat.compound = info.GetUseCompoundFile();
+ Msg(" hasProx=" + info.HasProx);
+ segInfoStat.hasProx = info.HasProx;
+ Msg(" numFiles=" + info.Files().Count);
+ segInfoStat.numFiles = info.Files().Count;
+ Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) }));
+ segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0);
+ IDictionary diagnostics = info.Diagnostics;
+ segInfoStat.diagnostics = diagnostics;
+ if (diagnostics.Count > 0)
+ {
+ Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics));
+ }
+
+ int docStoreOffset = info.DocStoreOffset;
+ if (docStoreOffset != - 1)
+ {
+ Msg(" docStoreOffset=" + docStoreOffset);
+ segInfoStat.docStoreOffset = docStoreOffset;
+ Msg(" docStoreSegment=" + info.DocStoreSegment);
+ segInfoStat.docStoreSegment = info.DocStoreSegment;
+ Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile);
+ segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile;
+ }
+ System.String delFileName = info.GetDelFileName();
+ if (delFileName == null)
+ {
+ Msg(" no deletions");
+ segInfoStat.hasDeletions = false;
+ }
+ else
+ {
+ Msg(" has deletions [delFileName=" + delFileName + "]");
+ segInfoStat.hasDeletions = true;
+ segInfoStat.deletionsFileName = delFileName;
+ }
+ if (infoStream != null)
+ infoStream.Write(" test: open reader.........");
+ reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
+
+ segInfoStat.openReaderPassed = true;
+
+ int numDocs = reader.NumDocs();
+ toLoseDocCount = numDocs;
+ if (reader.HasDeletions)
+ {
+ if (reader.deletedDocs.Count() != info.GetDelCount())
+ {
+ throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
+ }
+ if (reader.deletedDocs.Count() > reader.MaxDoc)
+ {
+ throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count());
+ }
+ if (info.docCount - numDocs != info.GetDelCount())
+ {
+ throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
+ }
+ segInfoStat.numDeleted = info.docCount - numDocs;
+ Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
+ }
+ else
+ {
+ if (info.GetDelCount() != 0)
+ {
+ throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs));
+ }
+ Msg("OK");
+ }
+ if (reader.MaxDoc != info.docCount)
+ throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount);
+
+ // Test getFieldNames()
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: fields..............");
+ }
+ ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL);
+ Msg("OK [" + fieldNames.Count + " fields]");
+ segInfoStat.numFields = fieldNames.Count;
+
+ // Test Field Norms
+ segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader);
+
+ // Test the Term Index
+ segInfoStat.termIndexStatus = TestTermIndex(info, reader);
+
+ // Test Stored Fields
+ segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf);
+
+ // Test Term Vectors
+ segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf);
+
+ // Rethrow the first exception we encountered
+ // This will cause stats for failed segments to be incremented properly
+ if (segInfoStat.fieldNormStatus.error != null)
+ {
+ throw new SystemException("Field Norm test failed");
+ }
+ else if (segInfoStat.termIndexStatus.error != null)
+ {
+ throw new SystemException("Term Index test failed");
+ }
+ else if (segInfoStat.storedFieldStatus.error != null)
+ {
+ throw new SystemException("Stored Field test failed");
+ }
+ else if (segInfoStat.termVectorStatus.error != null)
+ {
+ throw new System.SystemException("Term Vector test failed");
+ }
+
+ Msg("");
+ }
+ catch (System.Exception t)
+ {
+ Msg("FAILED");
+ const string comment = "fixIndex() would remove reference to this segment";
+ Msg(" WARNING: " + comment + "; full exception:");
+ if (infoStream != null)
+ infoStream.WriteLine(t.StackTrace);
+ Msg("");
+ result.totLoseDocCount += toLoseDocCount;
+ result.numBadSegments++;
+ continue;
+ }
+ finally
+ {
+ if (reader != null)
+ reader.Close();
+ }
+
+ // Keeper
+ result.newSegments.Add((SegmentInfo)info.Clone());
+ }
+
+ if (0 == result.numBadSegments)
+ {
+ result.clean = true;
+ Msg("No problems were detected with this index.\n");
+ }
+ else
+ Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
+
+ return result;
+ }
+
+ /// Test field norms.
+ private Status.FieldNormStatus TestFieldNorms(IEnumerable fieldNames, SegmentReader reader)
+ {
+ var status = new Status.FieldNormStatus();
+
+ try
+ {
+ // Test Field Norms
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: field norms.........");
+ }
+
+ var b = new byte[reader.MaxDoc];
+ foreach(string fieldName in fieldNames)
+ {
+ if (reader.HasNorms(fieldName))
+ {
+ reader.Norms(fieldName, b, 0);
+ ++status.totFields;
+ }
+ }
+
+ Msg("OK [" + status.totFields + " fields]");
+ }
+ catch (System.Exception e)
+ {
+ Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
+ status.error = e;
+ if (infoStream != null)
+ {
+ infoStream.WriteLine(e.StackTrace);
+ }
+ }
+
+ return status;
+ }
+
+ /// Test the term index.
+ private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader)
+ {
+ var status = new Status.TermIndexStatus();
+
+ try
+ {
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: terms, freq, prox...");
+ }
+
+ TermEnum termEnum = reader.Terms();
+ TermPositions termPositions = reader.TermPositions();
+
+ // Used only to count up # deleted docs for this term
+ var myTermDocs = new MySegmentTermDocs(reader);
+
+ int maxDoc = reader.MaxDoc;
+
+ while (termEnum.Next())
+ {
+ status.termCount++;
+ Term term = termEnum.Term;
+ int docFreq = termEnum.DocFreq();
+ termPositions.Seek(term);
+ int lastDoc = - 1;
+ int freq0 = 0;
+ status.totFreq += docFreq;
+ while (termPositions.Next())
+ {
+ freq0++;
+ int doc = termPositions.Doc;
+ int freq = termPositions.Freq;
+ if (doc <= lastDoc)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+ }
+ if (doc >= maxDoc)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
+ }
+
+ lastDoc = doc;
+ if (freq <= 0)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+ }
+
+ int lastPos = - 1;
+ status.totPos += freq;
+ for (int j = 0; j < freq; j++)
+ {
+ int pos = termPositions.NextPosition();
+ if (pos < - 1)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
+ }
+ if (pos < lastPos)
+ {
+ throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
+ }
+ lastPos = pos;
+ }
+ }
+
+ // Now count how many deleted docs occurred in
+ // this term:
+ int delCount;
+ if (reader.HasDeletions)
+ {
+ myTermDocs.Seek(term);
+ while (myTermDocs.Next())
+ {
+ }
+ delCount = myTermDocs.delCount;
+ }
+ else
+ {
+ delCount = 0;
+ }
+
+ if (freq0 + delCount != docFreq)
+ {
+ throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+ }
+ }
+
+ Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
+ }
+ catch (System.Exception e)
+ {
+ Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
+ status.error = e;
+ if (infoStream != null)
+ {
+ infoStream.WriteLine(e.StackTrace);
+ }
+ }
+
+ return status;
+ }
+
+ /// Test stored fields for a segment.
+ private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
+ {
+ var status = new Status.StoredFieldStatus();
+
+ try
+ {
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: stored fields.......");
+ }
+
+ // Scan stored fields for all documents
+ for (int j = 0; j < info.docCount; ++j)
+ {
+ if (!reader.IsDeleted(j))
+ {
+ status.docCount++;
+ Document doc = reader.Document(j);
+ status.totFields += doc.GetFields().Count;
+ }
+ }
+
+ // Validate docCount
+ if (status.docCount != reader.NumDocs())
+ {
+ throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
+ }
+
+ Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) }));
+ }
+ catch (System.Exception e)
+ {
+ Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
+ status.error = e;
+ if (infoStream != null)
+ {
+ infoStream.WriteLine(e.StackTrace);
+ }
+ }
+
+ return status;
+ }
+
+ /// Test term vectors for a segment.
+ private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format)
+ {
+ var status = new Status.TermVectorStatus();
+
+ try
+ {
+ if (infoStream != null)
+ {
+ infoStream.Write(" test: term vectors........");
+ }
+
+ for (int j = 0; j < info.docCount; ++j)
+ {
+ if (!reader.IsDeleted(j))
+ {
+ status.docCount++;
+ ITermFreqVector[] tfv = reader.GetTermFreqVectors(j);
+ if (tfv != null)
+ {
+ status.totVectors += tfv.Length;
+ }
+ }
+ }
+
+ Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) }));
+ }
+ catch (System.Exception e)
+ {
+ Msg("ERROR [" + System.Convert.ToString(e.Message) + "]");
+ status.error = e;
+ if (infoStream != null)
+ {
+ infoStream.WriteLine(e.StackTrace);
+ }
+ }
+
+ return status;
+ }
+
+ /// Repairs the index using previously returned result
+ /// from . Note that this does not
+ /// remove any of the unreferenced files after it's done;
+ /// you must separately open an , which
+ /// deletes unreferenced files when it's created.
+ ///
+ ///
WARNING : this writes a
+ /// new segments file into the index, effectively removing
+ /// all documents in broken segments from the index.
+ /// BE CAREFUL.
+ ///
+ ///
WARNING : Make sure you only call this when the
+ /// index is not opened by any writer.
+ ///
+ public virtual void FixIndex(Status result)
+ {
+ if (result.partial)
+ throw new System.ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
+ result.newSegments.Commit(result.dir);
+ }
+
+ private static bool assertsOn;
+
+ private static bool TestAsserts()
+ {
+ assertsOn = true;
+ return true;
+ }
+
+ private static bool AssertsOn()
+ {
+ System.Diagnostics.Debug.Assert(TestAsserts());
+ return assertsOn;
+ }
+
+ /// Command-line interface to check and fix an index.
+ ///
+ /// Run it like this:
+ ///
+ /// java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+ ///
+ ///
+ /// -fix : actually write a new segments_N file, removing any problematic segments
+ /// -segment X : only check the specified
+ /// segment(s). This can be specified multiple times,
+ /// to check more than one segment, eg -segment _2
+ /// -segment _a . You can't use this with the -fix
+ /// option.
+ ///
+ ///
WARNING : -fix should only be used on an emergency basis as it will cause
+ /// documents (perhaps many) to be permanently removed from the index. Always make
+ /// a backup copy of your index before running this! Do not run this tool on an index
+ /// that is actively being written to. You have been warned!
+ ///
Run without -fix, this tool will open the index, report version information
+ /// and report any exceptions it hits and what action it would take if -fix were
+ /// specified. With -fix, this tool will remove any segments that have issues and
+ /// write a new segments_N file. This means all documents contained in the affected
+ /// segments will be removed.
+ ///
+ /// This tool exits with exit code 1 if the index cannot be opened or has any
+ /// corruption, else 0.
+ ///
+ [STAThread]
+ public static void Main(System.String[] args)
+ {
+
+ bool doFix = false;
+ var onlySegments = new List();
+ System.String indexPath = null;
+ int i = 0;
+ while (i < args.Length)
+ {
+ if (args[i].Equals("-fix"))
+ {
+ doFix = true;
+ i++;
+ }
+ else if (args[i].Equals("-segment"))
+ {
+ if (i == args.Length - 1)
+ {
+ System.Console.Out.WriteLine("ERROR: missing name for -segment option");
+ System.Environment.Exit(1);
+ }
+ onlySegments.Add(args[i + 1]);
+ i += 2;
+ }
+ else
+ {
+ if (indexPath != null)
+ {
+ System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'");
+ System.Environment.Exit(1);
+ }
+ indexPath = args[i];
+ i++;
+ }
+ }
+
+ if (indexPath == null)
+ {
+ System.Console.Out.WriteLine("\nERROR: index path not specified");
+ System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n");
+ System.Environment.Exit(1);
+ }
+
+ if (!AssertsOn())
+ System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Lucene.Net...', so assertions are enabled");
+
+ if (onlySegments.Count == 0)
+ onlySegments = null;
+ else if (doFix)
+ {
+ System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment");
+ System.Environment.Exit(1);
+ }
+
+ System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n");
+ Directory dir = null;
+ try
+ {
+ dir = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath));
+ }
+ catch (Exception t)
+ {
+ Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting");
+ Console.Out.WriteLine(t.StackTrace);
+ Environment.Exit(1);
+ }
+
+ var checker = new CheckIndex(dir);
+ var tempWriter = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding)
+ {AutoFlush = true};
+ checker.SetInfoStream(tempWriter);
+
+ Status result = checker.CheckIndex_Renamed_Method(onlySegments);
+ if (result.missingSegments)
+ {
+ System.Environment.Exit(1);
+ }
+
+ if (!result.clean)
+ {
+ if (!doFix)
+ {
+ System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
+ }
+ else
+ {
+ Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
+ Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+ for (var s = 0; s < 5; s++)
+ {
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000));
+ System.Console.Out.WriteLine(" " + (5 - s) + "...");
+ }
+ Console.Out.WriteLine("Writing...");
+ checker.FixIndex(result);
+ Console.Out.WriteLine("OK");
+ Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\"");
+ }
+ }
+ System.Console.Out.WriteLine("");
+
+ int exitCode;
+ if (result != null && result.clean == true)
+ exitCode = 0;
+ else
+ exitCode = 1;
+ System.Environment.Exit(exitCode);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/CompoundFileReader.cs b/external/Lucene.Net.Light/src/core/Index/CompoundFileReader.cs
new file mode 100644
index 0000000000..74f4fb4d2e
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/CompoundFileReader.cs
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Linq;
+using Lucene.Net.Support;
+using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using Lock = Lucene.Net.Store.Lock;
+
+namespace Lucene.Net.Index
+{
+
+
+ /// Class for accessing a compound stream.
+ /// This class implements a directory, but is limited to only read operations.
+ /// Directory methods that would normally modify data throw an exception.
+ ///
+ public class CompoundFileReader : Directory
+ {
+
+ private readonly int readBufferSize;
+
+ private sealed class FileEntry
+ {
+ internal long offset;
+ internal long length;
+ }
+
+ private bool isDisposed;
+
+ // Base info
+ private readonly Directory directory;
+ private readonly System.String fileName;
+
+ private IndexInput stream;
+ private HashMap entries = new HashMap();
+
+
+ public CompoundFileReader(Directory dir, System.String name):this(dir, name, BufferedIndexInput.BUFFER_SIZE)
+ {
+ }
+
+ public CompoundFileReader(Directory dir, System.String name, int readBufferSize)
+ {
+ directory = dir;
+ fileName = name;
+ this.readBufferSize = readBufferSize;
+
+ bool success = false;
+
+ try
+ {
+ stream = dir.OpenInput(name, readBufferSize);
+
+ // read the directory and init files
+ int count = stream.ReadVInt();
+ FileEntry entry = null;
+ for (int i = 0; i < count; i++)
+ {
+ long offset = stream.ReadLong();
+ System.String id = stream.ReadString();
+
+ if (entry != null)
+ {
+ // set length of the previous entry
+ entry.length = offset - entry.offset;
+ }
+
+ entry = new FileEntry {offset = offset};
+ entries[id] = entry;
+ }
+
+ // set the length of the final entry
+ if (entry != null)
+ {
+ entry.length = stream.Length() - entry.offset;
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (!success && (stream != null))
+ {
+ try
+ {
+ stream.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ }
+ }
+ }
+ }
+
+ public virtual Directory Directory
+ {
+ get { return directory; }
+ }
+
+ public virtual string Name
+ {
+ get { return fileName; }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ lock (this)
+ {
+ if (isDisposed) return;
+ if (disposing)
+ {
+ if (entries != null)
+ {
+ entries.Clear();
+ }
+ if (stream != null)
+ {
+ stream.Close();
+ }
+ }
+
+ entries = null;
+ stream = null;
+ isDisposed = true;
+ }
+ }
+
+ public override IndexInput OpenInput(System.String id)
+ {
+ lock (this)
+ {
+ // Default to readBufferSize passed in when we were opened
+ return OpenInput(id, readBufferSize);
+ }
+ }
+
+ public override IndexInput OpenInput(System.String id, int readBufferSize)
+ {
+ lock (this)
+ {
+ if (stream == null)
+ throw new System.IO.IOException("Stream closed");
+
+ FileEntry entry = entries[id];
+ if (entry == null)
+ throw new System.IO.IOException("No sub-file with id " + id + " found");
+
+ return new CSIndexInput(stream, entry.offset, entry.length, readBufferSize);
+ }
+ }
+
+ /// Returns an array of strings, one for each file in the directory.
+ public override System.String[] ListAll()
+ {
+ return entries.Keys.ToArray();
+ }
+
+ /// Returns true iff a file with the given name exists.
+ public override bool FileExists(System.String name)
+ {
+ return entries.ContainsKey(name);
+ }
+
+ /// Returns the time the compound file was last modified.
+ public override long FileModified(System.String name)
+ {
+ return directory.FileModified(fileName);
+ }
+
+ /// Set the modified time of the compound file to now.
+ public override void TouchFile(System.String name)
+ {
+ directory.TouchFile(fileName);
+ }
+
+ /// Not implemented
+ /// UnsupportedOperationException
+ public override void DeleteFile(System.String name)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// Not implemented
+ /// UnsupportedOperationException
+ public void RenameFile(System.String from, System.String to)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// Returns the length of a file in the directory.
+ /// IOException if the file does not exist
+ public override long FileLength(System.String name)
+ {
+ FileEntry e = entries[name];
+ if (e == null)
+ throw new System.IO.IOException("File " + name + " does not exist");
+ return e.length;
+ }
+
+ /// Not implemented
+ /// UnsupportedOperationException
+ public override IndexOutput CreateOutput(System.String name)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// Not implemented
+ /// UnsupportedOperationException
+ public override Lock MakeLock(System.String name)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ /// Implementation of an IndexInput that reads from a portion of the
+ /// compound file. The visibility is left as "package" *only* because
+ /// this helps with testing since JUnit test cases in a different class
+ /// can then access package fields of this class.
+ ///
+ public /*internal*/ sealed class CSIndexInput : BufferedIndexInput
+ {
+ internal IndexInput base_Renamed;
+ internal long fileOffset;
+ internal long length;
+
+ private bool isDisposed;
+
+ internal CSIndexInput(IndexInput @base, long fileOffset, long length):this(@base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE)
+ {
+ }
+
+ internal CSIndexInput(IndexInput @base, long fileOffset, long length, int readBufferSize):base(readBufferSize)
+ {
+ this.base_Renamed = (IndexInput) @base.Clone();
+ this.fileOffset = fileOffset;
+ this.length = length;
+ }
+
+ public override System.Object Clone()
+ {
+ var clone = (CSIndexInput) base.Clone();
+ clone.base_Renamed = (IndexInput) base_Renamed.Clone();
+ clone.fileOffset = fileOffset;
+ clone.length = length;
+ return clone;
+ }
+
+ /// Expert: implements buffer refill. Reads bytes from the current
+ /// position in the input.
+ ///
+ /// the array to read bytes into
+ ///
+ /// the offset in the array to start storing bytes
+ ///
+ /// the number of bytes to read
+ ///
+ public override void ReadInternal(byte[] b, int offset, int len)
+ {
+ long start = FilePointer;
+ if (start + len > length)
+ throw new System.IO.IOException("read past EOF");
+ base_Renamed.Seek(fileOffset + start);
+ base_Renamed.ReadBytes(b, offset, len, false);
+ }
+
+ /// Expert: implements seek. Sets current position in this file, where
+ /// the next will occur.
+ ///
+ ///
+ ///
+ public override void SeekInternal(long pos)
+ {
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (isDisposed) return;
+
+ if (disposing)
+ {
+ if (base_Renamed != null)
+ {
+ base_Renamed.Close();
+ }
+ }
+
+ isDisposed = true;
+ }
+
+ public override long Length()
+ {
+ return length;
+ }
+
+ public IndexInput base_Renamed_ForNUnit
+ {
+ get { return base_Renamed; }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/CompoundFileWriter.cs b/external/Lucene.Net.Light/src/core/Index/CompoundFileWriter.cs
new file mode 100644
index 0000000000..e2905e121b
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/CompoundFileWriter.cs
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+
+ /// Combines multiple files into a single compound file.
+ /// The file format:
+ ///
+ /// - VInt fileCount
+ /// - {Directory}
+ /// fileCount entries with the following structure:
+ ///
+ /// - long dataOffset
+ /// - String fileName
+ ///
+ /// - {File Data}
+ /// fileCount entries with the raw data of the corresponding file
+ ///
+ ///
+ /// The fileCount integer indicates how many files are contained in this compound
+ /// file. The {directory} that follows has that many entries. Each directory entry
+ /// contains a long pointer to the start of this file's data section, and a String
+ /// with that file's name.
+ ///
+ public sealed class CompoundFileWriter : IDisposable
+ {
+
+ private sealed class FileEntry
+ {
+ /// source file
+ internal System.String file;
+
+ /// temporary holder for the start of directory entry for this file
+ internal long directoryOffset;
+
+ /// temporary holder for the start of this file's data section
+ internal long dataOffset;
+ }
+
+
+ private readonly Directory directory;
+ private readonly String fileName;
+ private readonly HashSet ids;
+ private readonly LinkedList entries;
+ private bool merged = false;
+ private readonly SegmentMerger.CheckAbort checkAbort;
+
+ /// Create the compound stream in the specified file. The file name is the
+ /// entire name (no extensions are added).
+ ///
+ /// NullPointerException if dir or name is null
+ public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null)
+ {
+ }
+
+ internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort)
+ {
+ if (dir == null)
+ throw new ArgumentNullException("dir");
+ if (name == null)
+ throw new ArgumentNullException("name");
+ this.checkAbort = checkAbort;
+ directory = dir;
+ fileName = name;
+ ids = new HashSet();
+ entries = new LinkedList();
+ }
+
+ /// Returns the directory of the compound file.
+ public Directory Directory
+ {
+ get { return directory; }
+ }
+
+ /// Returns the name of the compound file.
+ public string Name
+ {
+ get { return fileName; }
+ }
+
+ /// Add a source stream. file is the string by which the
+ /// sub-stream will be known in the compound stream.
+ ///
+ ///
+ /// IllegalStateException if this writer is closed
+ /// NullPointerException if file is null
+ /// IllegalArgumentException if a file with the same name
+ /// has been added already
+ ///
+ public void AddFile(String file)
+ {
+ if (merged)
+ throw new InvalidOperationException("Can't add extensions after merge has been called");
+
+ if (file == null)
+ throw new ArgumentNullException("file");
+
+ try
+ {
+ ids.Add(file);
+ }
+ catch (Exception)
+ {
+ throw new ArgumentException("File " + file + " already added");
+ }
+
+ var entry = new FileEntry {file = file};
+ entries.AddLast(entry);
+ }
+
+ [Obsolete("Use Dispose() instead")]
+ public void Close()
+ {
+ Dispose();
+ }
+
+ /// Merge files with the extensions added up to now.
+ /// All files with these extensions are combined sequentially into the
+ /// compound stream. After successful merge, the source files
+ /// are deleted.
+ ///
+ /// IllegalStateException if close() had been called before or
+ /// if no file has been added to this object
+ ///
+ public void Dispose()
+ {
+ // Extract into protected method if class ever becomes unsealed
+
+ // TODO: Dispose shouldn't throw exceptions!
+ if (merged)
+ throw new SystemException("Merge already performed");
+
+ if ((entries.Count == 0))
+ throw new SystemException("No entries to merge have been defined");
+
+ merged = true;
+
+ // open the compound stream
+ IndexOutput os = null;
+ try
+ {
+ os = directory.CreateOutput(fileName);
+
+ // Write the number of entries
+ os.WriteVInt(entries.Count);
+
+ // Write the directory with all offsets at 0.
+ // Remember the positions of directory entries so that we can
+ // adjust the offsets later
+ long totalSize = 0;
+ foreach (FileEntry fe in entries)
+ {
+ fe.directoryOffset = os.FilePointer;
+ os.WriteLong(0); // for now
+ os.WriteString(fe.file);
+ totalSize += directory.FileLength(fe.file);
+ }
+
+ // Pre-allocate size of file as optimization --
+ // this can potentially help IO performance as
+ // we write the file and also later during
+ // searching. It also uncovers a disk-full
+ // situation earlier and hopefully without
+ // actually filling disk to 100%:
+ long finalLength = totalSize + os.FilePointer;
+ os.SetLength(finalLength);
+
+ // Open the files and copy their data into the stream.
+ // Remember the locations of each file's data section.
+ var buffer = new byte[16384];
+ foreach (FileEntry fe in entries)
+ {
+ fe.dataOffset = os.FilePointer;
+ CopyFile(fe, os, buffer);
+ }
+
+ // Write the data offsets into the directory of the compound stream
+ foreach (FileEntry fe in entries)
+ {
+ os.Seek(fe.directoryOffset);
+ os.WriteLong(fe.dataOffset);
+ }
+
+ System.Diagnostics.Debug.Assert(finalLength == os.Length);
+
+ // Close the output stream. Set the os to null before trying to
+ // close so that if an exception occurs during the close, the
+ // finally clause below will not attempt to close the stream
+ // the second time.
+ IndexOutput tmp = os;
+ os = null;
+ tmp.Close();
+ }
+ finally
+ {
+ if (os != null)
+ try
+ {
+ os.Close();
+ }
+ catch (System.IO.IOException)
+ {
+ }
+ }
+ }
+
+
+ /// Copy the contents of the file with specified extension into the
+ /// provided output stream. Use the provided buffer for moving data
+ /// to reduce memory allocation.
+ ///
+ private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer)
+ {
+ IndexInput isRenamed = null;
+ try
+ {
+ long startPtr = os.FilePointer;
+
+ isRenamed = directory.OpenInput(source.file);
+ long length = isRenamed.Length();
+ long remainder = length;
+ int chunk = buffer.Length;
+
+ while (remainder > 0)
+ {
+ var len = (int) Math.Min(chunk, remainder);
+ isRenamed.ReadBytes(buffer, 0, len, false);
+ os.WriteBytes(buffer, len);
+ remainder -= len;
+ if (checkAbort != null)
+ // Roughly every 2 MB we will check if
+ // it's time to abort
+ checkAbort.Work(80);
+ }
+
+ // Verify that remainder is 0
+ if (remainder != 0)
+ throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")");
+
+ // Verify that the output length diff is equal to original file
+ long endPtr = os.FilePointer;
+ long diff = endPtr - startPtr;
+ if (diff != length)
+ throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length);
+ }
+ finally
+ {
+ if (isRenamed != null)
+ isRenamed.Close();
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/ConcurrentMergeScheduler.cs b/external/Lucene.Net.Light/src/core/Index/ConcurrentMergeScheduler.cs
new file mode 100644
index 0000000000..8b8a300820
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/ConcurrentMergeScheduler.cs
@@ -0,0 +1,504 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using Directory = Lucene.Net.Store.Directory;
+
+namespace Lucene.Net.Index
+{
+
+ /// A that runs each merge using a
+ /// separate thread, up until a maximum number of threads
+ /// ( ) at which when a merge is
+ /// needed, the thread(s) that are updating the index will
+ /// pause until one or more merges completes. This is a
+ /// simple way to use concurrency in the indexing process
+ /// without having to create and manage application level
+ /// threads.
+ ///
+
+ public class ConcurrentMergeScheduler:MergeScheduler
+ {
+
+ private int mergeThreadPriority = - 1;
+
+ protected internal IList mergeThreads = new List();
+
+ // Max number of threads allowed to be merging at once
+ private int _maxThreadCount = 1;
+
+ protected internal Directory dir;
+
+ private bool closed;
+ protected internal IndexWriter writer;
+ protected internal int mergeThreadCount;
+
+ public ConcurrentMergeScheduler()
+ {
+ if (allInstances != null)
+ {
+ // Only for testing
+ AddMyself();
+ }
+ }
+
+ /// Gets or sets the max # simultaneous threads that may be
+ /// running. If a merge is necessary yet we already have
+ /// this many threads running, the incoming thread (that
+ /// is calling add/updateDocument) will block until
+ /// a merge thread has completed.
+ ///
+ public virtual int MaxThreadCount
+ {
+ set
+ {
+ if (value < 1)
+ throw new System.ArgumentException("count should be at least 1");
+ _maxThreadCount = value;
+ }
+ get { return _maxThreadCount; }
+ }
+
+ /// Return the priority that merge threads run at. By
+ /// default the priority is 1 plus the priority of (ie,
+ /// slightly higher priority than) the first thread that
+ /// calls merge.
+ ///
+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")]
+ public virtual int GetMergeThreadPriority()
+ {
+ lock (this)
+ {
+ InitMergeThreadPriority();
+ return mergeThreadPriority;
+ }
+ }
+
+ /// Set the priority that merge threads run at.
+ public virtual void SetMergeThreadPriority(int pri)
+ {
+ lock (this)
+ {
+ if (pri > (int) System.Threading.ThreadPriority.Highest || pri < (int) System.Threading.ThreadPriority.Lowest)
+ throw new System.ArgumentException("priority must be in range " + (int) System.Threading.ThreadPriority.Lowest + " .. " + (int) System.Threading.ThreadPriority.Highest + " inclusive");
+ mergeThreadPriority = pri;
+
+ int numThreads = MergeThreadCount();
+ for (int i = 0; i < numThreads; i++)
+ {
+ MergeThread merge = mergeThreads[i];
+ merge.SetThreadPriority(pri);
+ }
+ }
+ }
+
+ private bool Verbose()
+ {
+ return writer != null && writer.Verbose;
+ }
+
+ private void Message(System.String message)
+ {
+ if (Verbose())
+ writer.Message("CMS: " + message);
+ }
+
+ private void InitMergeThreadPriority()
+ {
+ lock (this)
+ {
+ if (mergeThreadPriority == - 1)
+ {
+ // Default to slightly higher priority than our
+ // calling thread
+ mergeThreadPriority = 1 + (System.Int32) ThreadClass.Current().Priority;
+ if (mergeThreadPriority > (int) System.Threading.ThreadPriority.Highest)
+ mergeThreadPriority = (int) System.Threading.ThreadPriority.Highest;
+ }
+ }
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ //if (disposing)
+ //{
+ closed = true;
+ //}
+ }
+
+ public virtual void Sync()
+ {
+ lock (this)
+ {
+ while (MergeThreadCount() > 0)
+ {
+ if (Verbose())
+ Message("now wait for threads; currently " + mergeThreads.Count + " still running");
+ int count = mergeThreads.Count;
+ if (Verbose())
+ {
+ for (int i = 0; i < count; i++)
+ Message(" " + i + ": " + mergeThreads[i]);
+ }
+
+ System.Threading.Monitor.Wait(this);
+
+ }
+ }
+ }
+
+ private int MergeThreadCount()
+ {
+ lock (this)
+ {
+ int count = 0;
+ int numThreads = mergeThreads.Count;
+ for (int i = 0; i < numThreads; i++)
+ {
+ if (mergeThreads[i].IsAlive)
+ {
+ count++;
+ }
+ }
+ return count;
+ }
+ }
+
+ public override void Merge(IndexWriter writer)
+ {
+ // TODO: .NET doesn't support this
+ // assert !Thread.holdsLock(writer);
+
+ this.writer = writer;
+
+ InitMergeThreadPriority();
+
+ dir = writer.Directory;
+
+ // First, quickly run through the newly proposed merges
+ // and add any orthogonal merges (ie a merge not
+ // involving segments already pending to be merged) to
+ // the queue. If we are way behind on merging, many of
+ // these newly proposed merges will likely already be
+ // registered.
+
+ if (Verbose())
+ {
+ Message("now merge");
+ Message(" index: " + writer.SegString());
+ }
+
+ // Iterate, pulling from the IndexWriter's queue of
+ // pending merges, until it's empty:
+ while (true)
+ {
+ // TODO: we could be careful about which merges to do in
+ // the BG (eg maybe the "biggest" ones) vs FG, which
+ // merges to do first (the easiest ones?), etc.
+
+ MergePolicy.OneMerge merge = writer.GetNextMerge();
+ if (merge == null)
+ {
+ if (Verbose())
+ Message(" no more merges pending; now return");
+ return ;
+ }
+
+ // We do this w/ the primary thread to keep
+ // deterministic assignment of segment names
+ writer.MergeInit(merge);
+
+ bool success = false;
+ try
+ {
+ lock (this)
+ {
+ while (MergeThreadCount() >= _maxThreadCount)
+ {
+ if (Verbose())
+ Message(" too many merge threads running; stalling...");
+
+ System.Threading.Monitor.Wait(this);
+
+
+ }
+
+ if (Verbose())
+ Message(" consider merge " + merge.SegString(dir));
+
+ System.Diagnostics.Debug.Assert(MergeThreadCount() < _maxThreadCount);
+
+ // OK to spawn a new merge thread to handle this
+ // merge:
+ MergeThread merger = GetMergeThread(writer, merge);
+ mergeThreads.Add(merger);
+ if (Verbose())
+ Message(" launch new thread [" + merger.Name + "]");
+
+ merger.Start();
+ success = true;
+ }
+ }
+ finally
+ {
+ if (!success)
+ {
+ writer.MergeFinish(merge);
+ }
+ }
+ }
+ }
+
+ /// Does the actual merge, by calling
+ protected internal virtual void DoMerge(MergePolicy.OneMerge merge)
+ {
+ writer.Merge(merge);
+ }
+
+ /// Create and return a new MergeThread
+ protected internal virtual MergeThread GetMergeThread(IndexWriter writer, MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+ var thread = new MergeThread(this, writer, merge);
+ thread.SetThreadPriority(mergeThreadPriority);
+ thread.IsBackground = true;
+ thread.Name = "Lucene Merge Thread #" + mergeThreadCount++;
+ return thread;
+ }
+ }
+
+ public /*protected internal*/ class MergeThread:ThreadClass
+ {
+ private void InitBlock(ConcurrentMergeScheduler enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private ConcurrentMergeScheduler enclosingInstance;
+ public ConcurrentMergeScheduler Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal IndexWriter writer;
+ internal MergePolicy.OneMerge startMerge;
+ internal MergePolicy.OneMerge runningMerge;
+
+ public MergeThread(ConcurrentMergeScheduler enclosingInstance, IndexWriter writer, MergePolicy.OneMerge startMerge)
+ {
+ InitBlock(enclosingInstance);
+ this.writer = writer;
+ this.startMerge = startMerge;
+ }
+
+ public virtual void SetRunningMerge(MergePolicy.OneMerge merge)
+ {
+ lock (this)
+ {
+ runningMerge = merge;
+ }
+ }
+
+ public virtual MergePolicy.OneMerge RunningMerge
+ {
+ get
+ {
+ lock (this)
+ {
+ return runningMerge;
+ }
+ }
+ }
+
+ public virtual void SetThreadPriority(int pri)
+ {
+ try
+ {
+ Priority = (System.Threading.ThreadPriority) pri;
+ }
+ catch (System.NullReferenceException)
+ {
+ // Strangely, Sun's JDK 1.5 on Linux sometimes
+ // throws NPE out of here...
+ }
+ catch (System.Security.SecurityException)
+ {
+ // Ignore this because we will still run fine with
+ // normal thread priority
+ }
+ }
+
+ override public void Run()
+ {
+
+ // First time through the while loop we do the merge
+ // that we were started with:
+ MergePolicy.OneMerge merge = this.startMerge;
+
+ try
+ {
+
+ if (Enclosing_Instance.Verbose())
+ Enclosing_Instance.Message(" merge thread: start");
+
+ while (true)
+ {
+ SetRunningMerge(merge);
+ Enclosing_Instance.DoMerge(merge);
+
+ // Subsequent times through the loop we do any new
+ // merge that writer says is necessary:
+ merge = writer.GetNextMerge();
+ if (merge != null)
+ {
+ writer.MergeInit(merge);
+ if (Enclosing_Instance.Verbose())
+ Enclosing_Instance.Message(" merge thread: do another merge " + merge.SegString(Enclosing_Instance.dir));
+ }
+ else
+ break;
+ }
+
+ if (Enclosing_Instance.Verbose())
+ Enclosing_Instance.Message(" merge thread: done");
+ }
+ catch (System.Exception exc)
+ {
+ // Ignore the exception if it was due to abort:
+ if (!(exc is MergePolicy.MergeAbortedException))
+ {
+ if (!Enclosing_Instance.suppressExceptions)
+ {
+ // suppressExceptions is normally only set during
+ // testing.
+ Lucene.Net.Index.ConcurrentMergeScheduler.anyExceptions = true;
+ Enclosing_Instance.HandleMergeException(exc);
+ }
+ }
+ }
+ finally
+ {
+ lock (Enclosing_Instance)
+ {
+ System.Threading.Monitor.PulseAll(Enclosing_Instance);
+ Enclosing_Instance.mergeThreads.Remove(this);
+ bool removed = !Enclosing_Instance.mergeThreads.Contains(this);
+ System.Diagnostics.Debug.Assert(removed);
+ }
+ }
+ }
+
+ public override System.String ToString()
+ {
+ MergePolicy.OneMerge merge = RunningMerge ?? startMerge;
+ return "merge thread: " + merge.SegString(Enclosing_Instance.dir);
+ }
+ }
+
+ /// Called when an exception is hit in a background merge
+ /// thread
+ ///
+ protected internal virtual void HandleMergeException(System.Exception exc)
+ {
+ // When an exception is hit during merge, IndexWriter
+ // removes any partial files and then allows another
+ // merge to run. If whatever caused the error is not
+ // transient then the exception will keep happening,
+ // so, we sleep here to avoid saturating CPU in such
+ // cases:
+ System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 250));
+
+ throw new MergePolicy.MergeException(exc, dir);
+ }
+
+ internal static bool anyExceptions = false;
+
+ /// Used for testing
+ public static bool AnyUnhandledExceptions()
+ {
+ if (allInstances == null)
+ {
+ throw new System.SystemException("setTestMode() was not called; often this is because your test case's setUp method fails to call super.setUp in LuceneTestCase");
+ }
+ lock (allInstances)
+ {
+ int count = allInstances.Count;
+ // Make sure all outstanding threads are done so we see
+ // any exceptions they may produce:
+ for (int i = 0; i < count; i++)
+ allInstances[i].Sync();
+ bool v = anyExceptions;
+ anyExceptions = false;
+ return v;
+ }
+ }
+
+ public static void ClearUnhandledExceptions()
+ {
+ lock (allInstances)
+ {
+ anyExceptions = false;
+ }
+ }
+
+ /// Used for testing
+ private void AddMyself()
+ {
+ lock (allInstances)
+ {
+ int size = allInstances.Count;
+ int upto = 0;
+ for (int i = 0; i < size; i++)
+ {
+ ConcurrentMergeScheduler other = allInstances[i];
+ if (!(other.closed && 0 == other.MergeThreadCount()))
+ // Keep this one for now: it still has threads or
+ // may spawn new threads
+ allInstances[upto++] = other;
+ }
+ allInstances.RemoveRange(upto, allInstances.Count - upto);
+ allInstances.Add(this);
+ }
+ }
+
+ private bool suppressExceptions;
+
+ /// Used for testing
+ public /*internal*/ virtual void SetSuppressExceptions()
+ {
+ suppressExceptions = true;
+ }
+
+ /// Used for testing
+ public /*internal*/ virtual void ClearSuppressExceptions()
+ {
+ suppressExceptions = false;
+ }
+
+ /// Used for testing
+ private static List allInstances;
+ public static void SetTestMode()
+ {
+ allInstances = new List();
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/CorruptIndexException.cs b/external/Lucene.Net.Light/src/core/Index/CorruptIndexException.cs
new file mode 100644
index 0000000000..d846cb38ff
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/CorruptIndexException.cs
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// This exception is thrown when Lucene detects
+ /// an inconsistency in the index.
+ ///
+ [Serializable]
+ public class CorruptIndexException:System.IO.IOException
+ {
+ public CorruptIndexException(String message):base(message)
+ {
+ }
+ public CorruptIndexException(String message, Exception exp):base(message, exp)
+ {
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DefaultSkipListReader.cs b/external/Lucene.Net.Light/src/core/Index/DefaultSkipListReader.cs
new file mode 100644
index 0000000000..a1cdddec36
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DefaultSkipListReader.cs
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using Lucene.Net.Support;
+using IndexInput = Lucene.Net.Store.IndexInput;
+
+namespace Lucene.Net.Index
+{
+
+ /// Implements the skip list reader for the default posting list format
+ /// that stores positions and payloads.
+ ///
+ ///
+ class DefaultSkipListReader:MultiLevelSkipListReader
+ {
+ private bool currentFieldStoresPayloads;
+ private readonly long[] freqPointer;
+ private readonly long[] proxPointer;
+ private readonly int[] payloadLength;
+
+ private long lastFreqPointer;
+ private long lastProxPointer;
+ private int lastPayloadLength;
+
+
+ internal DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval):base(skipStream, maxSkipLevels, skipInterval)
+ {
+ freqPointer = new long[maxSkipLevels];
+ proxPointer = new long[maxSkipLevels];
+ payloadLength = new int[maxSkipLevels];
+ }
+
+ internal virtual void Init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, bool storesPayloads)
+ {
+ base.Init(skipPointer, df);
+ this.currentFieldStoresPayloads = storesPayloads;
+ lastFreqPointer = freqBasePointer;
+ lastProxPointer = proxBasePointer;
+
+ for (int i = 0; i < freqPointer.Length; i++) freqPointer[i] = freqBasePointer;
+ for (int i = 0; i < proxPointer.Length; i++) proxPointer[i] = proxBasePointer;
+ for (int i = 0; i < payloadLength.Length; i++) payloadLength[i] = 0;
+ }
+
+ /// Returns the freq pointer of the doc to which the last call of
+ /// has skipped.
+ ///
+ internal virtual long GetFreqPointer()
+ {
+ return lastFreqPointer;
+ }
+
+ /// Returns the prox pointer of the doc to which the last call of
+ /// has skipped.
+ ///
+ internal virtual long GetProxPointer()
+ {
+ return lastProxPointer;
+ }
+
+ /// Returns the payload length of the payload stored just before
+ /// the doc to which the last call of
+ /// has skipped.
+ ///
+ internal virtual int GetPayloadLength()
+ {
+ return lastPayloadLength;
+ }
+
+ protected internal override void SeekChild(int level)
+ {
+ base.SeekChild(level);
+ freqPointer[level] = lastFreqPointer;
+ proxPointer[level] = lastProxPointer;
+ payloadLength[level] = lastPayloadLength;
+ }
+
+ protected internal override void SetLastSkipData(int level)
+ {
+ base.SetLastSkipData(level);
+ lastFreqPointer = freqPointer[level];
+ lastProxPointer = proxPointer[level];
+ lastPayloadLength = payloadLength[level];
+ }
+
+
+ protected internal override int ReadSkipData(int level, IndexInput skipStream)
+ {
+ int delta;
+ if (currentFieldStoresPayloads)
+ {
+ // the current field stores payloads.
+ // if the doc delta is odd then we have
+ // to read the current payload length
+ // because it differs from the length of the
+ // previous payload
+ delta = skipStream.ReadVInt();
+ if ((delta & 1) != 0)
+ {
+ payloadLength[level] = skipStream.ReadVInt();
+ }
+ delta = Number.URShift(delta, 1);
+ }
+ else
+ {
+ delta = skipStream.ReadVInt();
+ }
+ freqPointer[level] += skipStream.ReadVInt();
+ proxPointer[level] += skipStream.ReadVInt();
+
+ return delta;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DefaultSkipListWriter.cs b/external/Lucene.Net.Light/src/core/Index/DefaultSkipListWriter.cs
new file mode 100644
index 0000000000..77412af81d
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DefaultSkipListWriter.cs
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+
+namespace Lucene.Net.Index
+{
+
+
+ /// Implements the skip list writer for the default posting list format
+ /// that stores positions and payloads.
+ ///
+ ///
+ class DefaultSkipListWriter:MultiLevelSkipListWriter
+ {
+ private int[] lastSkipDoc;
+ private int[] lastSkipPayloadLength;
+ private long[] lastSkipFreqPointer;
+ private long[] lastSkipProxPointer;
+
+ private IndexOutput freqOutput;
+ private IndexOutput proxOutput;
+
+ private int curDoc;
+ private bool curStorePayloads;
+ private int curPayloadLength;
+ private long curFreqPointer;
+ private long curProxPointer;
+
+ internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput):base(skipInterval, numberOfSkipLevels, docCount)
+ {
+ this.freqOutput = freqOutput;
+ this.proxOutput = proxOutput;
+
+ lastSkipDoc = new int[numberOfSkipLevels];
+ lastSkipPayloadLength = new int[numberOfSkipLevels];
+ lastSkipFreqPointer = new long[numberOfSkipLevels];
+ lastSkipProxPointer = new long[numberOfSkipLevels];
+ }
+
+ internal virtual void SetFreqOutput(IndexOutput freqOutput)
+ {
+ this.freqOutput = freqOutput;
+ }
+
+ internal virtual void SetProxOutput(IndexOutput proxOutput)
+ {
+ this.proxOutput = proxOutput;
+ }
+
+ /// Sets the values for the current skip data.
+ internal virtual void SetSkipData(int doc, bool storePayloads, int payloadLength)
+ {
+ this.curDoc = doc;
+ this.curStorePayloads = storePayloads;
+ this.curPayloadLength = payloadLength;
+ this.curFreqPointer = freqOutput.FilePointer;
+ if (proxOutput != null)
+ this.curProxPointer = proxOutput.FilePointer;
+ }
+
+ protected internal override void ResetSkip()
+ {
+ base.ResetSkip();
+ for (int i = 0; i < lastSkipDoc.Length; i++) lastSkipDoc[i] = 0;
+ for (int i = 0; i < lastSkipPayloadLength.Length; i++) lastSkipPayloadLength[i] = -1; // we don't have to write the first length in the skip list
+ for (int i = 0; i < lastSkipFreqPointer.Length; i++) lastSkipFreqPointer[i] = freqOutput.FilePointer;
+ if (proxOutput != null)
+ for (int i = 0; i < lastSkipProxPointer.Length; i++) lastSkipProxPointer[i] = proxOutput.FilePointer;
+ }
+
+ protected internal override void WriteSkipData(int level, IndexOutput skipBuffer)
+ {
+ // To efficiently store payloads in the posting lists we do not store the length of
+ // every payload. Instead we omit the length for a payload if the previous payload had
+ // the same length.
+ // However, in order to support skipping the payload length at every skip point must be known.
+ // So we use the same length encoding that we use for the posting lists for the skip data as well:
+ // Case 1: current field does not store payloads
+ // SkipDatum --> DocSkip, FreqSkip, ProxSkip
+ // DocSkip,FreqSkip,ProxSkip --> VInt
+ // DocSkip records the document number before every SkipInterval th document in TermFreqs.
+ // Document numbers are represented as differences from the previous value in the sequence.
+ // Case 2: current field stores payloads
+ // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
+ // DocSkip,FreqSkip,ProxSkip --> VInt
+ // PayloadLength --> VInt
+ // In this case DocSkip/2 is the difference between
+ // the current and the previous value. If DocSkip
+ // is odd, then a PayloadLength encoded as VInt follows,
+ // if DocSkip is even, then it is assumed that the
+ // current payload length equals the length at the previous
+ // skip point
+ if (curStorePayloads)
+ {
+ int delta = curDoc - lastSkipDoc[level];
+ if (curPayloadLength == lastSkipPayloadLength[level])
+ {
+ // the current payload length equals the length at the previous skip point,
+ // so we don't store the length again
+ skipBuffer.WriteVInt(delta * 2);
+ }
+ else
+ {
+ // the payload length is different from the previous one. We shift the DocSkip,
+ // set the lowest bit and store the current payload length as VInt.
+ skipBuffer.WriteVInt(delta * 2 + 1);
+ skipBuffer.WriteVInt(curPayloadLength);
+ lastSkipPayloadLength[level] = curPayloadLength;
+ }
+ }
+ else
+ {
+ // current field does not store payloads
+ skipBuffer.WriteVInt(curDoc - lastSkipDoc[level]);
+ }
+ skipBuffer.WriteVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
+ skipBuffer.WriteVInt((int) (curProxPointer - lastSkipProxPointer[level]));
+
+ lastSkipDoc[level] = curDoc;
+ //System.out.println("write doc at level " + level + ": " + curDoc);
+
+ lastSkipFreqPointer[level] = curFreqPointer;
+ lastSkipProxPointer[level] = curProxPointer;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DirectoryReader.cs b/external/Lucene.Net.Light/src/core/Index/DirectoryReader.cs
new file mode 100644
index 0000000000..574448ddf8
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DirectoryReader.cs
@@ -0,0 +1,1548 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Lucene.Net.Support;
+using Document = Lucene.Net.Documents.Document;
+using FieldSelector = Lucene.Net.Documents.FieldSelector;
+using Directory = Lucene.Net.Store.Directory;
+using Lock = Lucene.Net.Store.Lock;
+using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException;
+using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
+
+namespace Lucene.Net.Index
+{
+
+ /// An IndexReader which reads indexes with multiple segments.
+ public class DirectoryReader:IndexReader
+ {
+ /*new*/ private class AnonymousClassFindSegmentsFile:SegmentInfos.FindSegmentsFile
+ {
+ private void InitBlock(bool readOnly, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor)
+ {
+ this.readOnly = readOnly;
+ this.deletionPolicy = deletionPolicy;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
+ }
+ private bool readOnly;
+ private IndexDeletionPolicy deletionPolicy;
+ private int termInfosIndexDivisor;
+ internal AnonymousClassFindSegmentsFile(bool readOnly, Lucene.Net.Index.IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor, Lucene.Net.Store.Directory Param1):base(Param1)
+ {
+ InitBlock(readOnly, deletionPolicy, termInfosIndexDivisor);
+ }
+ public /*protected internal*/ override System.Object DoBody(System.String segmentFileName)
+ {
+ var infos = new SegmentInfos();
+ infos.Read(directory, segmentFileName);
+ if (readOnly)
+ return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor);
+ else
+ return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor);
+ }
+ }
+ private class AnonymousClassFindSegmentsFile1:SegmentInfos.FindSegmentsFile
+ {
+ private void InitBlock(bool openReadOnly, DirectoryReader enclosingInstance)
+ {
+ this.openReadOnly = openReadOnly;
+ this.enclosingInstance = enclosingInstance;
+ }
+ private bool openReadOnly;
+ private DirectoryReader enclosingInstance;
+ public DirectoryReader Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal AnonymousClassFindSegmentsFile1(bool openReadOnly, DirectoryReader enclosingInstance, Lucene.Net.Store.Directory Param1):base(Param1)
+ {
+ InitBlock(openReadOnly, enclosingInstance);
+ }
+ public /*protected internal*/ override System.Object DoBody(System.String segmentFileName)
+ {
+ var infos = new SegmentInfos();
+ infos.Read(directory, segmentFileName);
+ return Enclosing_Instance.DoReopen(infos, false, openReadOnly);
+ }
+ }
+ protected internal Directory internalDirectory;
+ protected internal bool readOnly;
+
+ internal IndexWriter writer;
+
+ private IndexDeletionPolicy deletionPolicy;
+ private readonly HashSet synced = new HashSet();
+ private Lock writeLock;
+ private readonly SegmentInfos segmentInfos;
+ private readonly SegmentInfos segmentInfosStart;
+ private bool stale;
+ private readonly int termInfosIndexDivisor;
+
+ private bool rollbackHasChanges;
+
+ private SegmentReader[] subReaders;
+ private int[] starts; // 1st docno for each segment
+ private System.Collections.Generic.IDictionary normsCache = new HashMap();
+ private int maxDoc = 0;
+ private int numDocs = - 1;
+ private bool hasDeletions = false;
+
+ // Max version in index as of when we opened; this can be
+ // > our current segmentInfos version in case we were
+ // opened on a past IndexCommit:
+ private long maxIndexVersion;
+
+ internal static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor)
+ {
+ return (IndexReader) new AnonymousClassFindSegmentsFile(readOnly, deletionPolicy, termInfosIndexDivisor, directory).Run(commit);
+ }
+
+ /// Construct reading the named set of readers.
+ internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor)
+ {
+ internalDirectory = directory;
+ this.readOnly = readOnly;
+ this.segmentInfos = sis;
+ this.deletionPolicy = deletionPolicy;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
+
+ if (!readOnly)
+ {
+ // We assume that this segments_N was previously
+ // properly sync'd:
+ synced.UnionWith(sis.Files(directory, true));
+ }
+
+ // To reduce the chance of hitting FileNotFound
+ // (and having to retry), we open segments in
+ // reverse because IndexWriter merges & deletes
+ // the newest segments first.
+
+ var readers = new SegmentReader[sis.Count];
+ for (int i = sis.Count - 1; i >= 0; i--)
+ {
+ bool success = false;
+ try
+ {
+ readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // Close all readers we had opened:
+ for (i++; i < sis.Count; i++)
+ {
+ try
+ {
+ readers[i].Close();
+ }
+ catch (System.Exception)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+
+ Initialize(readers);
+ }
+
+ // Used by near real-time search
+ internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor)
+ {
+ this.internalDirectory = writer.Directory;
+ this.readOnly = true;
+ segmentInfos = infos;
+ segmentInfosStart = (SegmentInfos) infos.Clone();
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
+ if (!readOnly)
+ {
+ // We assume that this segments_N was previously
+ // properly sync'd:
+ synced.UnionWith(infos.Files(internalDirectory, true));
+ }
+
+ // IndexWriter synchronizes externally before calling
+ // us, which ensures infos will not change; so there's
+ // no need to process segments in reverse order
+ int numSegments = infos.Count;
+ var readers = new SegmentReader[numSegments];
+ Directory dir = writer.Directory;
+ int upto = 0;
+
+ for (int i = 0; i < numSegments; i++)
+ {
+ bool success = false;
+ try
+ {
+ SegmentInfo info = infos.Info(i);
+ if (info.dir == dir)
+ {
+ readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor);
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // Close all readers we had opened:
+ for (upto--; upto >= 0; upto--)
+ {
+ try
+ {
+ readers[upto].Close();
+ }
+ catch (System.Exception)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+
+ this.writer = writer;
+
+ if (upto < readers.Length)
+ {
+ // This means some segments were in a foreign Directory
+ var newReaders = new SegmentReader[upto];
+ Array.Copy(readers, 0, newReaders, 0, upto);
+ readers = newReaders;
+ }
+
+ Initialize(readers);
+ }
+
+ /// This constructor is only used for
+ internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
+ IEnumerable> oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor)
+ {
+ this.internalDirectory = directory;
+ this.readOnly = readOnly;
+ this.segmentInfos = infos;
+ this.termInfosIndexDivisor = termInfosIndexDivisor;
+ if (!readOnly)
+ {
+ // We assume that this segments_N was previously
+ // properly sync'd:
+ synced.UnionWith(infos.Files(directory, true));
+ }
+
+ // we put the old SegmentReaders in a map, that allows us
+ // to lookup a reader using its segment name
+ IDictionary segmentReaders = new HashMap();
+
+ if (oldReaders != null)
+ {
+ // create a Map SegmentName->SegmentReader
+ for (int i = 0; i < oldReaders.Length; i++)
+ {
+ segmentReaders[oldReaders[i].SegmentName] = i;
+ }
+ }
+
+ var newReaders = new SegmentReader[infos.Count];
+
+ // remember which readers are shared between the old and the re-opened
+ // DirectoryReader - we have to incRef those readers
+ var readerShared = new bool[infos.Count];
+
+ for (int i = infos.Count - 1; i >= 0; i--)
+ {
+ // find SegmentReader for this segment
+ if (!segmentReaders.ContainsKey(infos.Info(i).name))
+ {
+ // this is a new segment, no old SegmentReader can be reused
+ newReaders[i] = null;
+ }
+ else
+ {
+ // there is an old reader for this segment - we'll try to reopen it
+ newReaders[i] = oldReaders[segmentReaders[infos.Info(i).name]];
+ }
+
+ bool success = false;
+ try
+ {
+ SegmentReader newReader;
+ if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].SegmentInfo.GetUseCompoundFile())
+ {
+
+ // We should never see a totally new segment during cloning
+ System.Diagnostics.Debug.Assert(!doClone);
+
+ // this is a new reader; in case we hit an exception we can close it safely
+ newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor);
+ }
+ else
+ {
+ newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly);
+ }
+ if (newReader == newReaders[i])
+ {
+ // this reader will be shared between the old and the new one,
+ // so we must incRef it
+ readerShared[i] = true;
+ newReader.IncRef();
+ }
+ else
+ {
+ readerShared[i] = false;
+ newReaders[i] = newReader;
+ }
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ for (i++; i < infos.Count; i++)
+ {
+ if (newReaders[i] != null)
+ {
+ try
+ {
+ if (!readerShared[i])
+ {
+ // this is a new subReader that is not used by the old one,
+ // we can close it
+ newReaders[i].Close();
+ }
+ else
+ {
+ // this subReader is also used by the old reader, so instead
+ // closing we must decRef it
+ newReaders[i].DecRef();
+ }
+ }
+ catch (System.IO.IOException)
+ {
+ // keep going - we want to clean up as much as possible
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // initialize the readers to calculate maxDoc before we try to reuse the old normsCache
+ Initialize(newReaders);
+
+ // try to copy unchanged norms from the old normsCache to the new one
+ if (oldNormsCache != null)
+ {
+ foreach(var entry in oldNormsCache)
+ {
+ String field = entry.Key;
+ if (!HasNorms(field))
+ {
+ continue;
+ }
+
+ byte[] oldBytes = entry.Value;
+
+ var bytes = new byte[MaxDoc];
+
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ int oldReaderIndex = segmentReaders[subReaders[i].SegmentName];
+
+ // this SegmentReader was not re-opened, we can copy all of its norms
+ if (segmentReaders.ContainsKey(subReaders[i].SegmentName) &&
+ (oldReaders[oldReaderIndex] == subReaders[i]
+ || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field]))
+ {
+ // we don't have to synchronize here: either this constructor is called from a SegmentReader,
+ // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
+ // which is synchronized
+ Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]);
+ }
+ else
+ {
+ subReaders[i].Norms(field, bytes, starts[i]);
+ }
+ }
+
+ normsCache[field] = bytes; // update cache
+ }
+ }
+ }
+
+ private void Initialize(SegmentReader[] subReaders)
+ {
+ this.subReaders = subReaders;
+ starts = new int[subReaders.Length + 1]; // build starts array
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ starts[i] = maxDoc;
+ maxDoc += subReaders[i].MaxDoc; // compute maxDocs
+
+ if (subReaders[i].HasDeletions)
+ hasDeletions = true;
+ }
+ starts[subReaders.Length] = maxDoc;
+
+ if (!readOnly)
+ {
+ maxIndexVersion = SegmentInfos.ReadCurrentVersion(internalDirectory);
+ }
+ }
+
+ public override Object Clone()
+ {
+ lock (this)
+ {
+ try
+ {
+ return Clone(readOnly); // Preserve current readOnly
+ }
+ catch (Exception ex)
+ {
+ throw new SystemException(ex.Message, ex); // TODO: why rethrow this way?
+ }
+ }
+ }
+
+ public override IndexReader Clone(bool openReadOnly)
+ {
+ lock (this)
+ {
+ DirectoryReader newReader = DoReopen((SegmentInfos) segmentInfos.Clone(), true, openReadOnly);
+
+ if (this != newReader)
+ {
+ newReader.deletionPolicy = deletionPolicy;
+ }
+ newReader.writer = writer;
+ // If we're cloning a non-readOnly reader, move the
+ // writeLock (if there is one) to the new reader:
+ if (!openReadOnly && writeLock != null)
+ {
+ // In near real-time search, reader is always readonly
+ System.Diagnostics.Debug.Assert(writer == null);
+ newReader.writeLock = writeLock;
+ newReader.hasChanges = hasChanges;
+ newReader.hasDeletions = hasDeletions;
+ writeLock = null;
+ hasChanges = false;
+ }
+
+ return newReader;
+ }
+ }
+
+ public override IndexReader Reopen()
+ {
+ // Preserve current readOnly
+ return DoReopen(readOnly, null);
+ }
+
+ public override IndexReader Reopen(bool openReadOnly)
+ {
+ return DoReopen(openReadOnly, null);
+ }
+
+ public override IndexReader Reopen(IndexCommit commit)
+ {
+ return DoReopen(true, commit);
+ }
+
+ private IndexReader DoReopenFromWriter(bool openReadOnly, IndexCommit commit)
+ {
+ System.Diagnostics.Debug.Assert(readOnly);
+
+ if (!openReadOnly)
+ {
+ throw new System.ArgumentException("a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)");
+ }
+
+ if (commit != null)
+ {
+ throw new System.ArgumentException("a reader obtained from IndexWriter.getReader() cannot currently accept a commit");
+ }
+
+ // TODO: right now we *always* make a new reader; in
+ // the future we could have write make some effort to
+ // detect that no changes have occurred
+ return writer.GetReader();
+ }
+
+ internal virtual IndexReader DoReopen(bool openReadOnly, IndexCommit commit)
+ {
+ EnsureOpen();
+
+ System.Diagnostics.Debug.Assert(commit == null || openReadOnly);
+
+ // If we were obtained by writer.getReader(), re-ask the
+ // writer to get a new reader.
+ if (writer != null)
+ {
+ return DoReopenFromWriter(openReadOnly, commit);
+ }
+ else
+ {
+ return DoReopenNoWriter(openReadOnly, commit);
+ }
+ }
+
+ private IndexReader DoReopenNoWriter(bool openReadOnly, IndexCommit commit)
+ {
+ lock (this)
+ {
+ if (commit == null)
+ {
+ if (hasChanges)
+ {
+ // We have changes, which means we are not readOnly:
+ System.Diagnostics.Debug.Assert(readOnly == false);
+ // and we hold the write lock:
+ System.Diagnostics.Debug.Assert(writeLock != null);
+ // so no other writer holds the write lock, which
+ // means no changes could have been done to the index:
+ System.Diagnostics.Debug.Assert(IsCurrent());
+
+ if (openReadOnly)
+ {
+ return Clone(openReadOnly);
+ }
+ else
+ {
+ return this;
+ }
+ }
+ else if (IsCurrent())
+ {
+ if (openReadOnly != readOnly)
+ {
+ // Just fallback to clone
+ return Clone(openReadOnly);
+ }
+ else
+ {
+ return this;
+ }
+ }
+ }
+ else
+ {
+ if (internalDirectory != commit.Directory)
+ throw new System.IO.IOException("the specified commit does not match the specified Directory");
+ if (segmentInfos != null && commit.SegmentsFileName.Equals(segmentInfos.GetCurrentSegmentFileName()))
+ {
+ if (readOnly != openReadOnly)
+ {
+ // Just fallback to clone
+ return Clone(openReadOnly);
+ }
+ else
+ {
+ return this;
+ }
+ }
+ }
+
+ return (IndexReader)new AnonymousFindSegmentsFile(internalDirectory, openReadOnly, this).Run(commit);
+ }
+ }
+
+ class AnonymousFindSegmentsFile : SegmentInfos.FindSegmentsFile
+ {
+ readonly DirectoryReader enclosingInstance;
+ readonly bool openReadOnly;
+ readonly Directory dir;
+ public AnonymousFindSegmentsFile(Directory directory, bool openReadOnly, DirectoryReader dirReader) : base(directory)
+ {
+ this.dir = directory;
+ this.openReadOnly = openReadOnly;
+ enclosingInstance = dirReader;
+ }
+
+ public override object DoBody(string segmentFileName)
+ {
+ var infos = new SegmentInfos();
+ infos.Read(dir, segmentFileName);
+ return enclosingInstance.DoReopen(infos, false, openReadOnly);
+ }
+ }
+
+ private DirectoryReader DoReopen(SegmentInfos infos, bool doClone, bool openReadOnly)
+ {
+ lock (this)
+ {
+ DirectoryReader reader;
+ if (openReadOnly)
+ {
+ reader = new ReadOnlyDirectoryReader(internalDirectory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor);
+ }
+ else
+ {
+ reader = new DirectoryReader(internalDirectory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor);
+ }
+ return reader;
+ }
+ }
+
+
+ /// Version number when this IndexReader was opened.
+ public override long Version
+ {
+ get
+ {
+ EnsureOpen();
+ return segmentInfos.Version;
+ }
+ }
+
+ public override ITermFreqVector[] GetTermFreqVectors(int n)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment
+ }
+
+ public override ITermFreqVector GetTermFreqVector(int n, System.String field)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].GetTermFreqVector(n - starts[i], field);
+ }
+
+
+ public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(docNumber); // find segment num
+ subReaders[i].GetTermFreqVector(docNumber - starts[i], field, mapper);
+ }
+
+ public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(docNumber); // find segment num
+ subReaders[i].GetTermFreqVector(docNumber - starts[i], mapper);
+ }
+
+ /// Checks is the index is optimized (if it has a single segment and no deletions)
+ /// <c>true</c> if the index is optimized; <c>false</c> otherwise
+ public override bool IsOptimized()
+ {
+ EnsureOpen();
+ return segmentInfos.Count == 1 && !HasDeletions;
+ }
+
+ public override int NumDocs()
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ // NOTE: multiple threads may wind up init'ing
+ // numDocs... but that's harmless
+ if (numDocs == - 1)
+ {
+ // check cache
+ int n = subReaders.Sum(t => t.NumDocs()); // cache miss--recompute
+ numDocs = n;
+ }
+ return numDocs;
+ }
+
+ public override int MaxDoc
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return maxDoc;
+ }
+ }
+
+ // inherit javadoc
+ public override Document Document(int n, FieldSelector fieldSelector)
+ {
+ EnsureOpen();
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].Document(n - starts[i], fieldSelector); // dispatch to segment reader
+ }
+
+ public override bool IsDeleted(int n)
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ int i = ReaderIndex(n); // find segment num
+ return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader
+ }
+
+ public override bool HasDeletions
+ {
+ get
+ {
+ // Don't call ensureOpen() here (it could affect performance)
+ return hasDeletions;
+ }
+ }
+
+ protected internal override void DoDelete(int n)
+ {
+ numDocs = - 1; // invalidate cache
+ int i = ReaderIndex(n); // find segment num
+ subReaders[i].DeleteDocument(n - starts[i]); // dispatch to segment reader
+ hasDeletions = true;
+ }
+
+ protected internal override void DoUndeleteAll()
+ {
+ foreach (SegmentReader t in subReaders)
+ t.UndeleteAll();
+
+ hasDeletions = false;
+ numDocs = - 1; // invalidate cache
+ }
+
+ private int ReaderIndex(int n)
+ {
+ // find reader for doc n:
+ return ReaderIndex(n, this.starts, this.subReaders.Length);
+ }
+
+ internal static int ReaderIndex(int n, int[] starts, int numSubReaders)
+ {
+ // find reader for doc n:
+ int lo = 0; // search starts array
+ int hi = numSubReaders - 1; // for first element less
+
+ while (hi >= lo)
+ {
+ int mid = Number.URShift((lo + hi), 1);
+ int midValue = starts[mid];
+ if (n < midValue)
+ hi = mid - 1;
+ else if (n > midValue)
+ lo = mid + 1;
+ else
+ {
+ // found a match
+ while (mid + 1 < numSubReaders && starts[mid + 1] == midValue)
+ {
+ mid++; // scan to last match
+ }
+ return mid;
+ }
+ }
+ return hi;
+ }
+
+ public override bool HasNorms(System.String field)
+ {
+ EnsureOpen();
+ return subReaders.Any(t => t.HasNorms(field));
+ }
+
+ public override byte[] Norms(System.String field)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = normsCache[field];
+ if (bytes != null)
+ return bytes; // cache hit
+ if (!HasNorms(field))
+ return null;
+
+ bytes = new byte[MaxDoc];
+ for (int i = 0; i < subReaders.Length; i++)
+ subReaders[i].Norms(field, bytes, starts[i]);
+ normsCache[field] = bytes; // update cache
+ return bytes;
+ }
+ }
+
+ public override void Norms(System.String field, byte[] result, int offset)
+ {
+ lock (this)
+ {
+ EnsureOpen();
+ byte[] bytes = normsCache[field];
+ if (bytes == null && !HasNorms(field))
+ {
+ byte val = DefaultSimilarity.EncodeNorm(1.0f);
+ for (int index = offset; index < result.Length; index++)
+ result.SetValue(val, index);
+ }
+ else if (bytes != null)
+ {
+ // cache hit
+ Array.Copy(bytes, 0, result, offset, MaxDoc);
+ }
+ else
+ {
+ for (int i = 0; i < subReaders.Length; i++)
+ {
+ // read from segments
+ subReaders[i].Norms(field, result, offset + starts[i]);
+ }
+ }
+ }
+ }
+
+ protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
+ {
+ lock (normsCache)
+ {
+ normsCache.Remove(field); // clear cache
+ }
+ int i = ReaderIndex(n); // find segment num
+ subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch
+ }
+
+ public override TermEnum Terms()
+ {
+ EnsureOpen();
+ return new MultiTermEnum(this, subReaders, starts, null);
+ }
+
+ public override TermEnum Terms(Term term)
+ {
+ EnsureOpen();
+ return new MultiTermEnum(this, subReaders, starts, term);
+ }
+
+ public override int DocFreq(Term t)
+ {
+ EnsureOpen();
+ int total = 0; // sum freqs in segments
+ for (int i = 0; i < subReaders.Length; i++)
+ total += subReaders[i].DocFreq(t);
+ return total;
+ }
+
+ public override TermDocs TermDocs()
+ {
+ EnsureOpen();
+ return new MultiTermDocs(this, subReaders, starts);
+ }
+
+ public override TermPositions TermPositions()
+ {
+ EnsureOpen();
+ return new MultiTermPositions(this, subReaders, starts);
+ }
+
+ /// Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory
+ /// owner.
+ ///
+ ///
+ /// StaleReaderException if the index has changed since this reader was opened
+ /// CorruptIndexException if the index is corrupt
+ /// Lucene.Net.Store.LockObtainFailedException
+ /// if another writer has this index open (write.lock could not be
+ /// obtained)
+ ///
+ /// IOException if there is a low-level IO error
+ protected internal override void AcquireWriteLock()
+ {
+
+ if (readOnly)
+ {
+ // NOTE: we should not reach this code w/ the core
+ // IndexReader classes; however, an external subclass
+ // of IndexReader could reach this.
+ ReadOnlySegmentReader.NoWrite();
+ }
+
+ if (segmentInfos != null)
+ {
+ EnsureOpen();
+ if (stale)
+ throw new StaleReaderException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
+
+ if (this.writeLock == null)
+ {
+ Lock writeLock = internalDirectory.MakeLock(IndexWriter.WRITE_LOCK_NAME);
+ if (!writeLock.Obtain(IndexWriter.WRITE_LOCK_TIMEOUT))
+ // obtain write lock
+ {
+ throw new LockObtainFailedException("Index locked for write: " + writeLock);
+ }
+ this.writeLock = writeLock;
+
+ // we have to check whether index has changed since this reader was opened.
+ // if so, this reader is no longer valid for
+ // deletion
+ if (SegmentInfos.ReadCurrentVersion(internalDirectory) > maxIndexVersion)
+ {
+ stale = true;
+ this.writeLock.Release();
+ this.writeLock = null;
+ throw new StaleReaderException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations");
+ }
+ }
+ }
+ }
+
+ /// Commit changes resulting from delete, undeleteAll, or setNorm operations
+ ///
+ /// If an exception is hit, then either no changes or all changes will have been committed to the index (transactional
+ /// semantics).
+ ///
+ ///
+ /// IOException if there is a low-level IO error
+ protected internal override void DoCommit(IDictionary commitUserData)
+ {
+ if (hasChanges)
+ {
+ segmentInfos.UserData = commitUserData;
+ // Default deleter (for backwards compatibility) is
+ // KeepOnlyLastCommitDeleter:
+ var deleter = new IndexFileDeleter(internalDirectory, deletionPolicy ?? new KeepOnlyLastCommitDeletionPolicy(), segmentInfos, null, null, synced);
+
+ segmentInfos.UpdateGeneration(deleter.LastSegmentInfos);
+
+ // Checkpoint the state we are about to change, in
+ // case we have to roll back:
+ StartCommit();
+
+ bool success = false;
+ try
+ {
+ foreach (SegmentReader t in subReaders)
+ t.Commit();
+
+ // Sync all files we just wrote
+ foreach(string fileName in segmentInfos.Files(internalDirectory, false))
+ {
+ if(!synced.Contains(fileName))
+ {
+ System.Diagnostics.Debug.Assert(internalDirectory.FileExists(fileName));
+ internalDirectory.Sync(fileName);
+ synced.Add(fileName);
+ }
+ }
+
+ segmentInfos.Commit(internalDirectory);
+ success = true;
+ }
+ finally
+ {
+
+ if (!success)
+ {
+
+ // Rollback changes that were made to
+ // SegmentInfos but failed to get [fully]
+ // committed. This way this reader instance
+ // remains consistent (matched to what's
+ // actually in the index):
+ RollbackCommit();
+
+ // Recompute deletable files & remove them (so
+ // partially written .del files, etc, are
+ // removed):
+ deleter.Refresh();
+ }
+ }
+
+ // Have the deleter remove any now unreferenced
+ // files due to this commit:
+ deleter.Checkpoint(segmentInfos, true);
+ deleter.Dispose();
+
+ maxIndexVersion = segmentInfos.Version;
+
+ if (writeLock != null)
+ {
+ writeLock.Release(); // release write lock
+ writeLock = null;
+ }
+ }
+ hasChanges = false;
+ }
+
+ internal virtual void StartCommit()
+ {
+ rollbackHasChanges = hasChanges;
+ foreach (SegmentReader t in subReaders)
+ {
+ t.StartCommit();
+ }
+ }
+
+ internal virtual void RollbackCommit()
+ {
+ hasChanges = rollbackHasChanges;
+ foreach (SegmentReader t in subReaders)
+ {
+ t.RollbackCommit();
+ }
+ }
+
+ public override IDictionary CommitUserData
+ {
+ get
+ {
+ EnsureOpen();
+ return segmentInfos.UserData;
+ }
+ }
+
+ public override bool IsCurrent()
+ {
+ EnsureOpen();
+ if (writer == null || writer.IsClosed())
+ {
+ // we loaded SegmentInfos from the directory
+ return SegmentInfos.ReadCurrentVersion(internalDirectory) == segmentInfos.Version;
+ }
+ else
+ {
+ return writer.NrtIsCurrent(segmentInfosStart);
+ }
+ }
+
+ protected internal override void DoClose()
+ {
+ lock (this)
+ {
+ System.IO.IOException ioe = null;
+ normsCache = null;
+ foreach (SegmentReader t in subReaders)
+ {
+ // try to close each reader, even if an exception is thrown
+ try
+ {
+ t.DecRef();
+ }
+ catch (System.IO.IOException e)
+ {
+ if (ioe == null)
+ ioe = e;
+ }
+ }
+
+ // NOTE: only needed in case someone had asked for
+ // FieldCache for top-level reader (which is generally
+ // not a good idea):
+ Search.FieldCache_Fields.DEFAULT.Purge(this);
+
+ // throw the first exception
+ if (ioe != null)
+ throw ioe;
+ }
+ }
+
+ public override ICollection GetFieldNames(IndexReader.FieldOption fieldNames)
+ {
+ EnsureOpen();
+ return GetFieldNames(fieldNames, this.subReaders);
+ }
+
+ internal static ICollection GetFieldNames(IndexReader.FieldOption fieldNames, IndexReader[] subReaders)
+ {
+ // maintain a unique set of field names
+ ISet fieldSet = Support.Compatibility.SetFactory.CreateHashSet();
+ foreach (IndexReader reader in subReaders)
+ {
+ fieldSet.UnionWith(reader.GetFieldNames(fieldNames));
+ }
+ return fieldSet;
+ }
+
+ public override IndexReader[] GetSequentialSubReaders()
+ {
+ return subReaders;
+ }
+
+ /// Returns the directory this index resides in.
+ public override Directory Directory()
+ {
+ // Don't ensureOpen here -- in certain cases, when a
+ // cloned/reopened reader needs to commit, it may call
+ // this method on the closed original reader
+ return internalDirectory;
+ }
+
+ public override int TermInfosIndexDivisor
+ {
+ get { return termInfosIndexDivisor; }
+ }
+
+ /// Expert: return the IndexCommit that this reader has opened.
+ ///
+ ///
WARNING : this API is new and experimental and may suddenly change.
+ ///
+ public override IndexCommit IndexCommit
+ {
+ get { return new ReaderCommit(segmentInfos, internalDirectory); }
+ }
+
+ ///
+ ///
+ public static new ICollection ListCommits(Directory dir)
+ {
+ String[] files = dir.ListAll();
+
+ ICollection commits = new List();
+
+ var latest = new SegmentInfos();
+ latest.Read(dir);
+ long currentGen = latest.Generation;
+
+ commits.Add(new ReaderCommit(latest, dir));
+
+ foreach (string fileName in files)
+ {
+ if (fileName.StartsWith(IndexFileNames.SEGMENTS) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN) && SegmentInfos.GenerationFromSegmentsFileName(fileName) < currentGen)
+ {
+
+ var sis = new SegmentInfos();
+ try
+ {
+ // IOException allowed to throw there, in case
+ // segments_N is corrupt
+ sis.Read(dir, fileName);
+ }
+ catch (System.IO.FileNotFoundException)
+ {
+ // LUCENE-948: on NFS (and maybe others), if
+ // you have writers switching back and forth
+ // between machines, it's very likely that the
+ // dir listing will be stale and will claim a
+ // file segments_X exists when in fact it
+ // doesn't. So, we catch this and handle it
+ // as if the file does not exist
+ sis = null;
+ }
+
+ if (sis != null)
+ commits.Add(new ReaderCommit(sis, dir));
+ }
+ }
+
+ return commits;
+ }
+
+ private sealed class ReaderCommit:IndexCommit
+ {
+ private readonly String segmentsFileName;
+ private readonly ICollection files;
+ private readonly Directory dir;
+ private readonly long generation;
+ private readonly long version;
+ private readonly bool isOptimized;
+ private readonly IDictionary userData;
+
+ internal ReaderCommit(SegmentInfos infos, Directory dir)
+ {
+ segmentsFileName = infos.GetCurrentSegmentFileName();
+ this.dir = dir;
+ userData = infos.UserData;
+ files = infos.Files(dir, true);
+ version = infos.Version;
+ generation = infos.Generation;
+ isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions();
+ }
+ public override string ToString()
+ {
+ return "DirectoryReader.ReaderCommit(" + segmentsFileName + ")";
+ }
+
+ public override bool IsOptimized
+ {
+ get { return isOptimized; }
+ }
+
+ public override string SegmentsFileName
+ {
+ get { return segmentsFileName; }
+ }
+
+ public override ICollection FileNames
+ {
+ get { return files; }
+ }
+
+ public override Directory Directory
+ {
+ get { return dir; }
+ }
+
+ public override long Version
+ {
+ get { return version; }
+ }
+
+ public override long Generation
+ {
+ get { return generation; }
+ }
+
+ public override bool IsDeleted
+ {
+ get { return false; }
+ }
+
+ public override IDictionary UserData
+ {
+ get { return userData; }
+ }
+
+ public override void Delete()
+ {
+ throw new System.NotSupportedException("This IndexCommit does not support deletions");
+ }
+ }
+
+ internal class MultiTermEnum:TermEnum
+ {
+ internal IndexReader topReader; // used for matching TermEnum to TermDocs
+ private readonly SegmentMergeQueue queue;
+
+ private Term term;
+ private int docFreq;
+ internal SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments
+
+ public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t)
+ {
+ this.topReader = topReader;
+ queue = new SegmentMergeQueue(readers.Length);
+ matchingSegments = new SegmentMergeInfo[readers.Length + 1];
+ for (int i = 0; i < readers.Length; i++)
+ {
+ IndexReader reader = readers[i];
+
+ TermEnum termEnum = t != null ? reader.Terms(t) : reader.Terms();
+
+ var smi = new SegmentMergeInfo(starts[i], termEnum, reader) {ord = i};
+ if (t == null?smi.Next():termEnum.Term != null)
+ queue.Add(smi);
+ // initialize queue
+ else
+ smi.Dispose();
+ }
+
+ if (t != null && queue.Size() > 0)
+ {
+ Next();
+ }
+ }
+
+ public override bool Next()
+ {
+ foreach (SegmentMergeInfo smi in matchingSegments)
+ {
+ if (smi == null)
+ break;
+ if (smi.Next())
+ queue.Add(smi);
+ else
+ smi.Dispose(); // done with segment
+ }
+
+ int numMatchingSegments = 0;
+ matchingSegments[0] = null;
+
+ SegmentMergeInfo top = queue.Top();
+
+ if (top == null)
+ {
+ term = null;
+ return false;
+ }
+
+ term = top.term;
+ docFreq = 0;
+
+ while (top != null && term.CompareTo(top.term) == 0)
+ {
+ matchingSegments[numMatchingSegments++] = top;
+ queue.Pop();
+ docFreq += top.termEnum.DocFreq(); // increment freq
+ top = queue.Top();
+ }
+
+ matchingSegments[numMatchingSegments] = null;
+ return true;
+ }
+
+ public override Term Term
+ {
+ get { return term; }
+ }
+
+ public override int DocFreq()
+ {
+ return docFreq;
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ queue.Dispose();
+ }
+ }
+ }
+
+ internal class MultiTermDocs : TermDocs
+ {
+ internal IndexReader topReader; // used for matching TermEnum to TermDocs
+ protected internal IndexReader[] readers;
+ protected internal int[] starts;
+ protected internal Term term;
+
+ protected internal int base_Renamed = 0;
+ protected internal int pointer = 0;
+
+ private readonly TermDocs[] readerTermDocs;
+ protected internal TermDocs current; // == readerTermDocs[pointer]
+
+ private MultiTermEnum tenum; // the term enum used for seeking... can be null
+ internal int matchingSegmentPos; // position into the matching segments from tenum
+ internal SegmentMergeInfo smi; // current segment mere info... can be null
+
+ public MultiTermDocs(IndexReader topReader, IndexReader[] r, int[] s)
+ {
+ this.topReader = topReader;
+ readers = r;
+ starts = s;
+
+ readerTermDocs = new TermDocs[r.Length];
+ }
+
+ public virtual int Doc
+ {
+ get { return base_Renamed + current.Doc; }
+ }
+
+ public virtual int Freq
+ {
+ get { return current.Freq; }
+ }
+
+ public virtual void Seek(Term term)
+ {
+ this.term = term;
+ this.base_Renamed = 0;
+ this.pointer = 0;
+ this.current = null;
+ this.tenum = null;
+ this.smi = null;
+ this.matchingSegmentPos = 0;
+ }
+
+ public virtual void Seek(TermEnum termEnum)
+ {
+ Seek(termEnum.Term);
+ var multiTermEnum = termEnum as MultiTermEnum;
+ if (multiTermEnum != null)
+ {
+ tenum = multiTermEnum;
+ if (topReader != tenum.topReader)
+ tenum = null;
+ }
+ }
+
+ public virtual bool Next()
+ {
+ for (; ; )
+ {
+ if (current != null && current.Next())
+ {
+ return true;
+ }
+ else if (pointer < readers.Length)
+ {
+ if (tenum != null)
+ {
+ smi = tenum.matchingSegments[matchingSegmentPos++];
+ if (smi == null)
+ {
+ pointer = readers.Length;
+ return false;
+ }
+ pointer = smi.ord;
+ }
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ /// Optimized implementation.
+ public virtual int Read(int[] docs, int[] freqs)
+ {
+ while (true)
+ {
+ while (current == null)
+ {
+ if (pointer < readers.Length)
+ {
+ // try next segment
+ if (tenum != null)
+ {
+ smi = tenum.matchingSegments[matchingSegmentPos++];
+ if (smi == null)
+ {
+ pointer = readers.Length;
+ return 0;
+ }
+ pointer = smi.ord;
+ }
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ int end = current.Read(docs, freqs);
+ if (end == 0)
+ {
+ // none left in segment
+ current = null;
+ }
+ else
+ {
+ // got some
+ int b = base_Renamed; // adjust doc numbers
+ for (int i = 0; i < end; i++)
+ docs[i] += b;
+ return end;
+ }
+ }
+ }
+
+ /* A Possible future optimization could skip entire segments */
+ public virtual bool SkipTo(int target)
+ {
+ for (; ; )
+ {
+ if (current != null && current.SkipTo(target - base_Renamed))
+ {
+ return true;
+ }
+ else if (pointer < readers.Length)
+ {
+ if (tenum != null)
+ {
+ SegmentMergeInfo smi = tenum.matchingSegments[matchingSegmentPos++];
+ if (smi == null)
+ {
+ pointer = readers.Length;
+ return false;
+ }
+ pointer = smi.ord;
+ }
+ base_Renamed = starts[pointer];
+ current = TermDocs(pointer++);
+ }
+ else
+ return false;
+ }
+ }
+
+ private TermDocs TermDocs(int i)
+ {
+ TermDocs result = readerTermDocs[i] ?? (readerTermDocs[i] = TermDocs(readers[i]));
+ if (smi != null)
+ {
+ System.Diagnostics.Debug.Assert((smi.ord == i));
+ System.Diagnostics.Debug.Assert((smi.termEnum.Term.Equals(term)));
+ result.Seek(smi.termEnum);
+ }
+ else
+ {
+ result.Seek(term);
+ }
+ return result;
+ }
+
+ protected internal virtual TermDocs TermDocs(IndexReader reader)
+ {
+ return term == null ? reader.TermDocs(null):reader.TermDocs();
+ }
+
+ public virtual void Close()
+ {
+ Dispose();
+ }
+
+ public virtual void Dispose()
+ {
+ Dispose(true);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ foreach (TermDocs t in readerTermDocs)
+ {
+ if (t != null)
+ t.Close();
+ }
+ }
+ }
+ }
+
+ internal class MultiTermPositions:MultiTermDocs, TermPositions
+ {
+ public MultiTermPositions(IndexReader topReader, IndexReader[] r, int[] s):base(topReader, r, s)
+ {
+ }
+
+ protected internal override TermDocs TermDocs(IndexReader reader)
+ {
+ return reader.TermPositions();
+ }
+
+ public virtual int NextPosition()
+ {
+ return ((TermPositions) current).NextPosition();
+ }
+
+ public virtual int PayloadLength
+ {
+ get { return ((TermPositions) current).PayloadLength; }
+ }
+
+ public virtual byte[] GetPayload(byte[] data, int offset)
+ {
+ return ((TermPositions) current).GetPayload(data, offset);
+ }
+
+
+ // TODO: Remove warning after API has been finalized
+
+ public virtual bool IsPayloadAvailable
+ {
+ get { return ((TermPositions) current).IsPayloadAvailable; }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocConsumer.cs b/external/Lucene.Net.Light/src/core/Index/DocConsumer.cs
new file mode 100644
index 0000000000..238e38c5fc
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocConsumer.cs
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocConsumer
+ {
+ public abstract DocConsumerPerThread AddThread(DocumentsWriterThreadState perThread);
+ public abstract void Flush(System.Collections.Generic.ICollection threads, SegmentWriteState state);
+ public abstract void CloseDocStore(SegmentWriteState state);
+ public abstract void Abort();
+ public abstract bool FreeRAM();
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocConsumerPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocConsumerPerThread.cs
new file mode 100644
index 0000000000..7c7ed025d8
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocConsumerPerThread.cs
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocConsumerPerThread
+ {
+
+ /// Process the document. If there is
+ /// something for this document to be done in docID order,
+ /// you should encapsulate that as a
+ /// DocumentsWriter.DocWriter and return it.
+ /// DocumentsWriter then calls finish() on this object
+ /// when it's its turn.
+ ///
+ public abstract DocumentsWriter.DocWriter ProcessDocument();
+
+ public abstract void Abort();
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumer.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumer.cs
new file mode 100644
index 0000000000..7fc59da4e5
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumer.cs
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocFieldConsumer
+ {
+
+ internal FieldInfos fieldInfos;
+
+ /// Called when DocumentsWriter decides to create a new
+ /// segment
+ ///
+ public abstract void Flush(IDictionary> threadsAndFields, SegmentWriteState state);
+
+ /// Called when DocumentsWriter decides to close the doc
+ /// stores
+ ///
+ public abstract void CloseDocStore(SegmentWriteState state);
+
+ /// Called when an aborting exception is hit
+ public abstract void Abort();
+
+ /// Add a new thread
+ public abstract DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread);
+
+ /// Called when DocumentsWriter is using too much RAM.
+ /// The consumer should free RAM, if possible, returning
+ /// true if any RAM was in fact freed.
+ ///
+ public abstract bool FreeRAM();
+
+ internal virtual void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ this.fieldInfos = fieldInfos;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerField.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerField.cs
new file mode 100644
index 0000000000..27636e2033
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerField.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocFieldConsumerPerField
+ {
+ /// Processes all occurrences of a single field
+ public abstract void ProcessFields(IFieldable[] fields, int count);
+ public abstract void Abort();
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerThread.cs
new file mode 100644
index 0000000000..8f533ac657
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerThread.cs
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ abstract class DocFieldConsumerPerThread
+ {
+ public abstract void StartDocument();
+ public abstract DocumentsWriter.DocWriter FinishDocument();
+ public abstract DocFieldConsumerPerField AddField(FieldInfo fi);
+ public abstract void Abort();
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumers.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumers.cs
new file mode 100644
index 0000000000..61b9b1dacb
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumers.cs
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// This is just a "splitter" class: it lets you wrap two
+ /// DocFieldConsumer instances as a single consumer.
+ ///
+
+ sealed class DocFieldConsumers : DocFieldConsumer
+ {
+ private void InitBlock()
+ {
+ docFreeList = new PerDoc[1];
+ }
+ internal DocFieldConsumer one;
+ internal DocFieldConsumer two;
+
+ public DocFieldConsumers(DocFieldConsumer one, DocFieldConsumer two)
+ {
+ InitBlock();
+ this.one = one;
+ this.two = two;
+ }
+
+ internal override void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ base.SetFieldInfos(fieldInfos);
+ one.SetFieldInfos(fieldInfos);
+ two.SetFieldInfos(fieldInfos);
+ }
+
+ public override void Flush(IDictionary> threadsAndFields, SegmentWriteState state)
+ {
+
+ var oneThreadsAndFields = new HashMap>();
+ var twoThreadsAndFields = new HashMap>();
+
+ foreach(var entry in threadsAndFields)
+ {
+ DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key;
+ ICollection fields = entry.Value;
+
+ IEnumerator fieldsIt = fields.GetEnumerator();
+ ICollection oneFields = new HashSet();
+ ICollection twoFields = new HashSet();
+ while (fieldsIt.MoveNext())
+ {
+ DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current;
+ oneFields.Add(perField.one);
+ twoFields.Add(perField.two);
+ }
+
+ oneThreadsAndFields[perThread.one] = oneFields;
+ twoThreadsAndFields[perThread.two] = twoFields;
+ }
+
+
+ one.Flush(oneThreadsAndFields, state);
+ two.Flush(twoThreadsAndFields, state);
+ }
+
+ public override void CloseDocStore(SegmentWriteState state)
+ {
+ try
+ {
+ one.CloseDocStore(state);
+ }
+ finally
+ {
+ two.CloseDocStore(state);
+ }
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+
+ public override bool FreeRAM()
+ {
+ bool any = one.FreeRAM();
+ any |= two.FreeRAM();
+ return any;
+ }
+
+ public override DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread)
+ {
+ return new DocFieldConsumersPerThread(docFieldProcessorPerThread, this, one.AddThread(docFieldProcessorPerThread), two.AddThread(docFieldProcessorPerThread));
+ }
+
+ internal PerDoc[] docFreeList;
+ internal int freeCount;
+ internal int allocCount;
+
+ internal PerDoc GetPerDoc()
+ {
+ lock (this)
+ {
+ if (freeCount == 0)
+ {
+ allocCount++;
+ if (allocCount > docFreeList.Length)
+ {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
+ docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
+ }
+ return new PerDoc(this);
+ }
+ else
+ return docFreeList[--freeCount];
+ }
+ }
+
+ internal void FreePerDoc(PerDoc perDoc)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
+ docFreeList[freeCount++] = perDoc;
+ }
+ }
+
+ internal class PerDoc:DocumentsWriter.DocWriter
+ {
+ public PerDoc(DocFieldConsumers enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(DocFieldConsumers enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DocFieldConsumers enclosingInstance;
+ public DocFieldConsumers Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal DocumentsWriter.DocWriter one;
+ internal DocumentsWriter.DocWriter two;
+
+ public override long SizeInBytes()
+ {
+ return one.SizeInBytes() + two.SizeInBytes();
+ }
+
+ public override void Finish()
+ {
+ try
+ {
+ try
+ {
+ one.Finish();
+ }
+ finally
+ {
+ two.Finish();
+ }
+ }
+ finally
+ {
+ Enclosing_Instance.FreePerDoc(this);
+ }
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+ finally
+ {
+ Enclosing_Instance.FreePerDoc(this);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerField.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerField.cs
new file mode 100644
index 0000000000..71e96e0eaa
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerField.cs
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class DocFieldConsumersPerField:DocFieldConsumerPerField
+ {
+
+ internal DocFieldConsumerPerField one;
+ internal DocFieldConsumerPerField two;
+ internal DocFieldConsumersPerThread perThread;
+
+ public DocFieldConsumersPerField(DocFieldConsumersPerThread perThread, DocFieldConsumerPerField one, DocFieldConsumerPerField two)
+ {
+ this.perThread = perThread;
+ this.one = one;
+ this.two = two;
+ }
+
+ public override void ProcessFields(IFieldable[] fields, int count)
+ {
+ one.ProcessFields(fields, count);
+ two.ProcessFields(fields, count);
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerThread.cs
new file mode 100644
index 0000000000..7098966e14
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerThread.cs
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ sealed class DocFieldConsumersPerThread:DocFieldConsumerPerThread
+ {
+
+ internal DocFieldConsumerPerThread one;
+ internal DocFieldConsumerPerThread two;
+ internal DocFieldConsumers parent;
+ internal DocumentsWriter.DocState docState;
+
+ public DocFieldConsumersPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocFieldConsumers parent, DocFieldConsumerPerThread one, DocFieldConsumerPerThread two)
+ {
+ this.parent = parent;
+ this.one = one;
+ this.two = two;
+ docState = docFieldProcessorPerThread.docState;
+ }
+
+ public override void StartDocument()
+ {
+ one.StartDocument();
+ two.StartDocument();
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ DocumentsWriter.DocWriter oneDoc = one.FinishDocument();
+ DocumentsWriter.DocWriter twoDoc = two.FinishDocument();
+ if (oneDoc == null)
+ return twoDoc;
+ else if (twoDoc == null)
+ return oneDoc;
+ else
+ {
+ DocFieldConsumers.PerDoc both = parent.GetPerDoc();
+ both.docID = docState.docID;
+ System.Diagnostics.Debug.Assert(oneDoc.docID == docState.docID);
+ System.Diagnostics.Debug.Assert(twoDoc.docID == docState.docID);
+ both.one = oneDoc;
+ both.two = twoDoc;
+ return both;
+ }
+ }
+
+ public override DocFieldConsumerPerField AddField(FieldInfo fi)
+ {
+ return new DocFieldConsumersPerField(this, one.AddField(fi), two.AddField(fi));
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldProcessor.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessor.cs
new file mode 100644
index 0000000000..42891185ce
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessor.cs
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// This is a DocConsumer that gathers all fields under the
+ /// same name, and calls per-field consumers to process field
+ /// by field. This class doesn't doesn't do any "real" work
+ /// of its own: it just forwards the fields to a
+ /// DocFieldConsumer.
+ ///
+
+ sealed class DocFieldProcessor : DocConsumer
+ {
+
+ internal DocumentsWriter docWriter;
+ internal FieldInfos fieldInfos = new FieldInfos();
+ internal DocFieldConsumer consumer;
+ internal StoredFieldsWriter fieldsWriter;
+
+ public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer)
+ {
+ this.docWriter = docWriter;
+ this.consumer = consumer;
+ consumer.SetFieldInfos(fieldInfos);
+ fieldsWriter = new StoredFieldsWriter(docWriter, fieldInfos);
+ }
+
+ public override void CloseDocStore(SegmentWriteState state)
+ {
+ consumer.CloseDocStore(state);
+ fieldsWriter.CloseDocStore(state);
+ }
+
+ public override void Flush(ICollection threads, SegmentWriteState state)
+ {
+ var childThreadsAndFields = new HashMap>();
+ foreach(DocConsumerPerThread thread in threads)
+ {
+ DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)thread;
+ childThreadsAndFields[perThread.consumer] = perThread.Fields();
+ perThread.TrimFields(state);
+ }
+ fieldsWriter.Flush(state);
+ consumer.Flush(childThreadsAndFields, state);
+
+ // Important to save after asking consumer to flush so
+ // consumer can alter the FieldInfo* if necessary. EG,
+ // FreqProxTermsWriter does this with
+ // FieldInfo.storePayload.
+ System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION);
+ fieldInfos.Write(state.directory, fileName);
+ state.flushedFiles.Add(fileName);
+ }
+
+ public override void Abort()
+ {
+ fieldsWriter.Abort();
+ consumer.Abort();
+ }
+
+ public override bool FreeRAM()
+ {
+ return consumer.FreeRAM();
+ }
+
+ public override DocConsumerPerThread AddThread(DocumentsWriterThreadState threadState)
+ {
+ return new DocFieldProcessorPerThread(threadState, this);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerField.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerField.cs
new file mode 100644
index 0000000000..1078988705
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerField.cs
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+
+namespace Lucene.Net.Index
+{
+
+ /// Holds all per thread, per field state.
+
+ sealed class DocFieldProcessorPerField
+ {
+
+ internal DocFieldConsumerPerField consumer;
+ internal FieldInfo fieldInfo;
+
+ internal DocFieldProcessorPerField next;
+ internal int lastGen = - 1;
+
+ internal int fieldCount;
+ internal IFieldable[] fields = new IFieldable[1];
+
+ public DocFieldProcessorPerField(DocFieldProcessorPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.consumer = perThread.consumer.AddField(fieldInfo);
+ this.fieldInfo = fieldInfo;
+ }
+
+ public void Abort()
+ {
+ consumer.Abort();
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerThread.cs
new file mode 100644
index 0000000000..d108116b58
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerThread.cs
@@ -0,0 +1,478 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Support;
+using Document = Lucene.Net.Documents.Document;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+
+namespace Lucene.Net.Index
+{
+
+ /// Gathers all Fieldables for a document under the same
+ /// name, updates FieldInfos, and calls per-field consumers
+ /// to process field by field.
+ ///
+ /// Currently, only a single thread visits the fields,
+ /// sequentially, for processing.
+ ///
+
+ sealed class DocFieldProcessorPerThread:DocConsumerPerThread
+ {
+ private void InitBlock()
+ {
+ docFreeList = new PerDoc[1];
+ }
+
+ internal float docBoost;
+ internal int fieldGen;
+ internal DocFieldProcessor docFieldProcessor;
+ internal FieldInfos fieldInfos;
+ internal DocFieldConsumerPerThread consumer;
+
+ // Holds all fields seen in current doc
+ internal DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
+ internal int fieldCount;
+
+ // Hash table for all fields ever seen
+ internal DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
+ internal int hashMask = 1;
+ internal int totalFieldCount;
+
+ internal StoredFieldsWriterPerThread fieldsWriter;
+
+ internal DocumentsWriter.DocState docState;
+
+ public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor)
+ {
+ InitBlock();
+ this.docState = threadState.docState;
+ this.docFieldProcessor = docFieldProcessor;
+ this.fieldInfos = docFieldProcessor.fieldInfos;
+ this.consumer = docFieldProcessor.consumer.AddThread(this);
+ fieldsWriter = docFieldProcessor.fieldsWriter.AddThread(docState);
+ }
+
+ public override void Abort()
+ {
+ for (int i = 0; i < fieldHash.Length; i++)
+ {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while (field != null)
+ {
+ DocFieldProcessorPerField next = field.next;
+ field.Abort();
+ field = next;
+ }
+ }
+ fieldsWriter.Abort();
+ consumer.Abort();
+ }
+
+ public System.Collections.Generic.ICollection Fields()
+ {
+ System.Collections.Generic.ICollection fields =
+ new System.Collections.Generic.HashSet();
+ for (int i = 0; i < fieldHash.Length; i++)
+ {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while (field != null)
+ {
+ fields.Add(field.consumer);
+ field = field.next;
+ }
+ }
+ System.Diagnostics.Debug.Assert(fields.Count == totalFieldCount);
+ return fields;
+ }
+
+ /// If there are fields we've seen but did not see again
+ /// in the last run, then free them up.
+ ///
+
+ internal void TrimFields(SegmentWriteState state)
+ {
+
+ for (int i = 0; i < fieldHash.Length; i++)
+ {
+ DocFieldProcessorPerField perField = fieldHash[i];
+ DocFieldProcessorPerField lastPerField = null;
+
+ while (perField != null)
+ {
+
+ if (perField.lastGen == - 1)
+ {
+
+ // This field was not seen since the previous
+ // flush, so, free up its resources now
+
+ // Unhash
+ if (lastPerField == null)
+ fieldHash[i] = perField.next;
+ else
+ lastPerField.next = perField.next;
+
+ if (state.docWriter.infoStream != null)
+ state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name);
+
+ totalFieldCount--;
+ }
+ else
+ {
+ // Reset
+ perField.lastGen = - 1;
+ lastPerField = perField;
+ }
+
+ perField = perField.next;
+ }
+ }
+ }
+
+ private void Rehash()
+ {
+ int newHashSize = (fieldHash.Length * 2);
+ System.Diagnostics.Debug.Assert(newHashSize > fieldHash.Length);
+
+ DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize];
+
+ // Rehash
+ int newHashMask = newHashSize - 1;
+ for (int j = 0; j < fieldHash.Length; j++)
+ {
+ DocFieldProcessorPerField fp0 = fieldHash[j];
+ while (fp0 != null)
+ {
+ int hashPos2 = fp0.fieldInfo.name.GetHashCode() & newHashMask;
+ DocFieldProcessorPerField nextFP0 = fp0.next;
+ fp0.next = newHashArray[hashPos2];
+ newHashArray[hashPos2] = fp0;
+ fp0 = nextFP0;
+ }
+ }
+
+ fieldHash = newHashArray;
+ hashMask = newHashMask;
+ }
+
+ public override DocumentsWriter.DocWriter ProcessDocument()
+ {
+
+ consumer.StartDocument();
+ fieldsWriter.StartDocument();
+
+ Document doc = docState.doc;
+
+ System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start"));
+
+ fieldCount = 0;
+
+ int thisFieldGen = fieldGen++;
+
+ System.Collections.Generic.IList docFields = doc.GetFields();
+ int numDocFields = docFields.Count;
+
+ // Absorb any new fields first seen in this document.
+ // Also absorb any changes to fields we had already
+ // seen before (eg suddenly turning on norms or
+ // vectors, etc.):
+
+ for (int i = 0; i < numDocFields; i++)
+ {
+ IFieldable field = docFields[i];
+ System.String fieldName = field.Name;
+
+ // Make sure we have a PerField allocated
+ int hashPos = fieldName.GetHashCode() & hashMask;
+ DocFieldProcessorPerField fp = fieldHash[hashPos];
+ while (fp != null && !fp.fieldInfo.name.Equals(fieldName))
+ fp = fp.next;
+
+ if (fp == null)
+ {
+
+ // TODO FI: we need to genericize the "flags" that a
+ // field holds, and, how these flags are merged; it
+ // needs to be more "pluggable" such that if I want
+ // to have a new "thing" my Fields can do, I can
+ // easily add it
+ FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored,
+ field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
+ field.OmitNorms, false, field.OmitTermFreqAndPositions);
+
+ fp = new DocFieldProcessorPerField(this, fi);
+ fp.next = fieldHash[hashPos];
+ fieldHash[hashPos] = fp;
+ totalFieldCount++;
+
+ if (totalFieldCount >= fieldHash.Length / 2)
+ Rehash();
+ }
+ else
+ {
+ fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored,
+ field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector,
+ field.OmitNorms, false, field.OmitTermFreqAndPositions);
+ }
+
+ if (thisFieldGen != fp.lastGen)
+ {
+
+ // First time we're seeing this field for this doc
+ fp.fieldCount = 0;
+
+ if (fieldCount == fields.Length)
+ {
+ int newSize = fields.Length * 2;
+ DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
+ Array.Copy(fields, 0, newArray, 0, fieldCount);
+ fields = newArray;
+ }
+
+ fields[fieldCount++] = fp;
+ fp.lastGen = thisFieldGen;
+ }
+
+ if (fp.fieldCount == fp.fields.Length)
+ {
+ IFieldable[] newArray = new IFieldable[fp.fields.Length * 2];
+ Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount);
+ fp.fields = newArray;
+ }
+
+ fp.fields[fp.fieldCount++] = field;
+ if (field.IsStored)
+ {
+ fieldsWriter.AddField(field, fp.fieldInfo);
+ }
+ }
+
+ // If we are writing vectors then we must visit
+ // fields in sorted order so they are written in
+ // sorted order. TODO: we actually only need to
+ // sort the subset of fields that have vectors
+ // enabled; we could save [small amount of] CPU
+ // here.
+ QuickSort(fields, 0, fieldCount - 1);
+
+ for (int i = 0; i < fieldCount; i++)
+ fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount);
+
+ if (docState.maxTermPrefix != null && docState.infoStream != null)
+ {
+ docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
+ docState.maxTermPrefix = null;
+ }
+
+ DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument();
+ DocumentsWriter.DocWriter two = consumer.FinishDocument();
+ if (one == null)
+ {
+ return two;
+ }
+ else if (two == null)
+ {
+ return one;
+ }
+ else
+ {
+ PerDoc both = GetPerDoc();
+ both.docID = docState.docID;
+ System.Diagnostics.Debug.Assert(one.docID == docState.docID);
+ System.Diagnostics.Debug.Assert(two.docID == docState.docID);
+ both.one = one;
+ both.two = two;
+ return both;
+ }
+ }
+
+ internal void QuickSort(DocFieldProcessorPerField[] array, int lo, int hi)
+ {
+ if (lo >= hi)
+ return ;
+ else if (hi == 1 + lo)
+ {
+ if (String.CompareOrdinal(array[lo].fieldInfo.name, array[hi].fieldInfo.name) > 0)
+ {
+ DocFieldProcessorPerField tmp = array[lo];
+ array[lo] = array[hi];
+ array[hi] = tmp;
+ }
+ return ;
+ }
+
+ int mid = Number.URShift((lo + hi), 1);
+
+ if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
+ {
+ DocFieldProcessorPerField tmp = array[lo];
+ array[lo] = array[mid];
+ array[mid] = tmp;
+ }
+
+ if (String.CompareOrdinal(array[mid].fieldInfo.name, array[hi].fieldInfo.name) > 0)
+ {
+ DocFieldProcessorPerField tmp = array[mid];
+ array[mid] = array[hi];
+ array[hi] = tmp;
+
+ if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0)
+ {
+ DocFieldProcessorPerField tmp2 = array[lo];
+ array[lo] = array[mid];
+ array[mid] = tmp2;
+ }
+ }
+
+ int left = lo + 1;
+ int right = hi - 1;
+
+ if (left >= right)
+ return ;
+
+ DocFieldProcessorPerField partition = array[mid];
+
+ for (; ; )
+ {
+ while (String.CompareOrdinal(array[right].fieldInfo.name, partition.fieldInfo.name) > 0)
+ --right;
+
+ while (left < right && String.CompareOrdinal(array[left].fieldInfo.name, partition.fieldInfo.name) <= 0)
+ ++left;
+
+ if (left < right)
+ {
+ DocFieldProcessorPerField tmp = array[left];
+ array[left] = array[right];
+ array[right] = tmp;
+ --right;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ QuickSort(array, lo, left);
+ QuickSort(array, left + 1, hi);
+ }
+
+ internal PerDoc[] docFreeList;
+ internal int freeCount;
+ internal int allocCount;
+
+ internal PerDoc GetPerDoc()
+ {
+ lock (this)
+ {
+ if (freeCount == 0)
+ {
+ allocCount++;
+ if (allocCount > docFreeList.Length)
+ {
+ // Grow our free list up front to make sure we have
+ // enough space to recycle all outstanding PerDoc
+ // instances
+ System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
+ docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
+ }
+ return new PerDoc(this);
+ }
+ else
+ return docFreeList[--freeCount];
+ }
+ }
+
+ internal void FreePerDoc(PerDoc perDoc)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
+ docFreeList[freeCount++] = perDoc;
+ }
+ }
+
+ internal class PerDoc:DocumentsWriter.DocWriter
+ {
+ public PerDoc(DocFieldProcessorPerThread enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(DocFieldProcessorPerThread enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DocFieldProcessorPerThread enclosingInstance;
+ public DocFieldProcessorPerThread Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ internal DocumentsWriter.DocWriter one;
+ internal DocumentsWriter.DocWriter two;
+
+ public override long SizeInBytes()
+ {
+ return one.SizeInBytes() + two.SizeInBytes();
+ }
+
+ public override void Finish()
+ {
+ try
+ {
+ try
+ {
+ one.Finish();
+ }
+ finally
+ {
+ two.Finish();
+ }
+ }
+ finally
+ {
+ Enclosing_Instance.FreePerDoc(this);
+ }
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ try
+ {
+ one.Abort();
+ }
+ finally
+ {
+ two.Abort();
+ }
+ }
+ finally
+ {
+ Enclosing_Instance.FreePerDoc(this);
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocInverter.cs b/external/Lucene.Net.Light/src/core/Index/DocInverter.cs
new file mode 100644
index 0000000000..4153465405
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocInverter.cs
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System.Collections.Generic;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Index
+{
+
+ /// This is a DocFieldConsumer that inverts each field,
+ /// separately, from a Document, and accepts a
+ /// InvertedTermsConsumer to process those terms.
+ ///
+
+ sealed class DocInverter : DocFieldConsumer
+ {
+
+ internal InvertedDocConsumer consumer;
+ internal InvertedDocEndConsumer endConsumer;
+
+ public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer)
+ {
+ this.consumer = consumer;
+ this.endConsumer = endConsumer;
+ }
+
+ internal override void SetFieldInfos(FieldInfos fieldInfos)
+ {
+ base.SetFieldInfos(fieldInfos);
+ consumer.SetFieldInfos(fieldInfos);
+ endConsumer.SetFieldInfos(fieldInfos);
+ }
+
+ public override void Flush(IDictionary> threadsAndFields, SegmentWriteState state)
+ {
+
+ var childThreadsAndFields = new HashMap>();
+ var endChildThreadsAndFields = new HashMap>();
+
+ foreach (var entry in threadsAndFields)
+ {
+ var perThread = (DocInverterPerThread) entry.Key;
+
+ ICollection childFields = new HashSet();
+ ICollection endChildFields = new HashSet();
+ foreach(DocFieldConsumerPerField field in entry.Value)
+ {
+ var perField = (DocInverterPerField)field;
+ childFields.Add(perField.consumer);
+ endChildFields.Add(perField.endConsumer);
+ }
+
+ childThreadsAndFields[perThread.consumer] = childFields;
+ endChildThreadsAndFields[perThread.endConsumer] = endChildFields;
+ }
+
+ consumer.Flush(childThreadsAndFields, state);
+ endConsumer.Flush(endChildThreadsAndFields, state);
+ }
+
+ public override void CloseDocStore(SegmentWriteState state)
+ {
+ consumer.CloseDocStore(state);
+ endConsumer.CloseDocStore(state);
+ }
+
+ public override void Abort()
+ {
+ consumer.Abort();
+ endConsumer.Abort();
+ }
+
+ public override bool FreeRAM()
+ {
+ return consumer.FreeRAM();
+ }
+
+ public override DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread)
+ {
+ return new DocInverterPerThread(docFieldProcessorPerThread, this);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocInverterPerField.cs b/external/Lucene.Net.Light/src/core/Index/DocInverterPerField.cs
new file mode 100644
index 0000000000..8cd7c0aea6
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocInverterPerField.cs
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Documents;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Index
+{
+
+ /// Holds state for inverting all occurrences of a single
+ /// field in the document. This class doesn't do anything
+ /// itself; instead, it forwards the tokens produced by
+ /// analysis to its own consumer
+ /// (InvertedDocConsumerPerField). It also interacts with an
+ /// endConsumer (InvertedDocEndConsumerPerField).
+ ///
+
+ sealed class DocInverterPerField:DocFieldConsumerPerField
+ {
+
+ private DocInverterPerThread perThread;
+ private FieldInfo fieldInfo;
+ internal InvertedDocConsumerPerField consumer;
+ internal InvertedDocEndConsumerPerField endConsumer;
+ internal DocumentsWriter.DocState docState;
+ internal FieldInvertState fieldState;
+
+ public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.perThread = perThread;
+ this.fieldInfo = fieldInfo;
+ docState = perThread.docState;
+ fieldState = perThread.fieldState;
+ this.consumer = perThread.consumer.AddField(this, fieldInfo);
+ this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo);
+ }
+
+ public override void Abort()
+ {
+ consumer.Abort();
+ endConsumer.Abort();
+ }
+
+ public override void ProcessFields(IFieldable[] fields, int count)
+ {
+
+ fieldState.Reset(docState.doc.Boost);
+
+ int maxFieldLength = docState.maxFieldLength;
+
+ bool doInvert = consumer.Start(fields, count);
+
+ for (int i = 0; i < count; i++)
+ {
+
+ IFieldable field = fields[i];
+
+ // TODO FI: this should be "genericized" to querying
+ // consumer if it wants to see this particular field
+ // tokenized.
+ if (field.IsIndexed && doInvert)
+ {
+
+ bool anyToken;
+
+ if (fieldState.length > 0)
+ fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
+
+ if (!field.IsTokenized)
+ {
+ // un-tokenized field
+ System.String stringValue = field.StringValue;
+ int valueLength = stringValue.Length;
+ perThread.singleToken.Reinit(stringValue, 0, valueLength);
+ fieldState.attributeSource = perThread.singleToken;
+ consumer.Start(field);
+
+ bool success = false;
+ try
+ {
+ consumer.Add();
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ docState.docWriter.SetAborting();
+ }
+ fieldState.offset += valueLength;
+ fieldState.length++;
+ fieldState.position++;
+ anyToken = valueLength > 0;
+ }
+ else
+ {
+ // tokenized field
+ TokenStream stream;
+ TokenStream streamValue = field.TokenStreamValue;
+
+ if (streamValue != null)
+ stream = streamValue;
+ else
+ {
+ // the field does not have a TokenStream,
+ // so we have to obtain one from the analyzer
+ System.IO.TextReader reader; // find or make Reader
+ System.IO.TextReader readerValue = field.ReaderValue;
+
+ if (readerValue != null)
+ reader = readerValue;
+ else
+ {
+ System.String stringValue = field.StringValue;
+ if (stringValue == null)
+ throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
+ perThread.stringReader.Init(stringValue);
+ reader = perThread.stringReader;
+ }
+
+ // Tokenize field and add to postingTable
+ stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
+ }
+
+ // reset the TokenStream to the first token
+ stream.Reset();
+
+ int startLength = fieldState.length;
+
+ try
+ {
+ int offsetEnd = fieldState.offset - 1;
+
+ bool hasMoreTokens = stream.IncrementToken();
+
+ fieldState.attributeSource = stream;
+
+ IOffsetAttribute offsetAttribute = fieldState.attributeSource.AddAttribute();
+ IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute();
+
+ consumer.Start(field);
+
+ for (; ; )
+ {
+
+ // If we hit an exception in stream.next below
+ // (which is fairly common, eg if analyzer
+ // chokes on a given document), then it's
+ // non-aborting and (above) this one document
+ // will be marked as deleted, but still
+ // consume a docID
+
+ if (!hasMoreTokens)
+ break;
+
+ int posIncr = posIncrAttribute.PositionIncrement;
+ fieldState.position += posIncr;
+ if (fieldState.position > 0)
+ {
+ fieldState.position--;
+ }
+
+ if (posIncr == 0)
+ fieldState.numOverlap++;
+
+ bool success = false;
+ try
+ {
+ // If we hit an exception in here, we abort
+ // all buffered documents since the last
+ // flush, on the likelihood that the
+ // internal state of the consumer is now
+ // corrupt and should not be flushed to a
+ // new segment:
+ consumer.Add();
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ docState.docWriter.SetAborting();
+ }
+ fieldState.position++;
+ offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
+ if (++fieldState.length >= maxFieldLength)
+ {
+ if (docState.infoStream != null)
+ docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
+ break;
+ }
+
+ hasMoreTokens = stream.IncrementToken();
+ }
+ // trigger streams to perform end-of-stream operations
+ stream.End();
+
+ fieldState.offset += offsetAttribute.EndOffset;
+ anyToken = fieldState.length > startLength;
+ }
+ finally
+ {
+ stream.Close();
+ }
+ }
+
+ if (anyToken)
+ fieldState.offset += docState.analyzer.GetOffsetGap(field);
+ fieldState.boost *= field.Boost;
+ }
+
+ // LUCENE-2387: don't hang onto the field, so GC can
+ // reclaim
+ fields[i] = null;
+ }
+
+ consumer.Finish();
+ endConsumer.Finish();
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocInverterPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocInverterPerThread.cs
new file mode 100644
index 0000000000..c38ed35b5f
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocInverterPerThread.cs
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using TokenStream = Lucene.Net.Analysis.TokenStream;
+
+namespace Lucene.Net.Index
+{
+
+ /// This is a DocFieldConsumer that inverts each field,
+ /// separately, from a Document, and accepts a
+ /// InvertedTermsConsumer to process those terms.
+ ///
+
+ sealed class DocInverterPerThread : DocFieldConsumerPerThread
+ {
+ private void InitBlock()
+ {
+ singleToken = new SingleTokenAttributeSource();
+ }
+ internal DocInverter docInverter;
+ internal InvertedDocConsumerPerThread consumer;
+ internal InvertedDocEndConsumerPerThread endConsumer;
+ internal SingleTokenAttributeSource singleToken;
+
+ internal class SingleTokenAttributeSource : AttributeSource
+ {
+ internal ITermAttribute termAttribute;
+ internal IOffsetAttribute offsetAttribute;
+
+ internal SingleTokenAttributeSource()
+ {
+ termAttribute = AddAttribute();
+ offsetAttribute = AddAttribute();
+ }
+
+ public void Reinit(System.String stringValue, int startOffset, int endOffset)
+ {
+ termAttribute.SetTermBuffer(stringValue);
+ offsetAttribute.SetOffset(startOffset, endOffset);
+ }
+ }
+
+ internal DocumentsWriter.DocState docState;
+
+ internal FieldInvertState fieldState = new FieldInvertState();
+
+ // Used to read a string value for a field
+ internal ReusableStringReader stringReader = new ReusableStringReader();
+
+ public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter)
+ {
+ InitBlock();
+ this.docInverter = docInverter;
+ docState = docFieldProcessorPerThread.docState;
+ consumer = docInverter.consumer.AddThread(this);
+ endConsumer = docInverter.endConsumer.AddThread(this);
+ }
+
+ public override void StartDocument()
+ {
+ consumer.StartDocument();
+ endConsumer.StartDocument();
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ // TODO: allow endConsumer.finishDocument to also return
+ // a DocWriter
+ endConsumer.FinishDocument();
+ return consumer.FinishDocument();
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ consumer.Abort();
+ }
+ finally
+ {
+ endConsumer.Abort();
+ }
+ }
+
+ public override DocFieldConsumerPerField AddField(FieldInfo fi)
+ {
+ return new DocInverterPerField(this, fi);
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocumentsWriter.cs b/external/Lucene.Net.Light/src/core/Index/DocumentsWriter.cs
new file mode 100644
index 0000000000..6545d11129
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocumentsWriter.cs
@@ -0,0 +1,2075 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using Lucene.Net.Support;
+using Analyzer = Lucene.Net.Analysis.Analyzer;
+using Document = Lucene.Net.Documents.Document;
+using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
+using Directory = Lucene.Net.Store.Directory;
+using ArrayUtil = Lucene.Net.Util.ArrayUtil;
+using Constants = Lucene.Net.Util.Constants;
+using IndexSearcher = Lucene.Net.Search.IndexSearcher;
+using Query = Lucene.Net.Search.Query;
+using Scorer = Lucene.Net.Search.Scorer;
+using Similarity = Lucene.Net.Search.Similarity;
+using Weight = Lucene.Net.Search.Weight;
+
+namespace Lucene.Net.Index
+{
+
+ /// This class accepts multiple added documents and directly
+ /// writes a single segment file. It does this more
+ /// efficiently than creating a single segment per document
+ /// (with DocumentWriter) and doing standard merges on those
+ /// segments.
+ ///
+ /// Each added document is passed to the ,
+ /// which in turn processes the document and interacts with
+ /// other consumers in the indexing chain. Certain
+ /// consumers, like and
+ ///, digest a document and
+ /// immediately write bytes to the "doc store" files (ie,
+ /// they do not consume RAM per document, except while they
+ /// are processing the document).
+ ///
+ /// Other consumers, eg and
+ /// , buffer bytes in RAM and flush only
+ /// when a new segment is produced.
+ /// Once we have used our allowed RAM buffer, or the number
+ /// of added docs is large enough (in the case we are
+ /// flushing by doc count instead of RAM usage), we create a
+ /// real segment and flush it to the Directory.
+ ///
+ /// Threads:
+ ///
+ /// Multiple threads are allowed into addDocument at once.
+ /// There is an initial synchronized call to getThreadState
+ /// which allocates a ThreadState for this thread. The same
+ /// thread will get the same ThreadState over time (thread
+ /// affinity) so that if there are consistent patterns (for
+ /// example each thread is indexing a different content
+ /// source) then we make better use of RAM. Then
+ /// processDocument is called on that ThreadState without
+ /// synchronization (most of the "heavy lifting" is in this
+ /// call). Finally the synchronized "finishDocument" is
+ /// called to flush changes to the directory.
+ ///
+ /// When flush is called by IndexWriter we forcefully idle
+ /// all threads and flush only once they are all idle. This
+ /// means you can call flush with a given thread even while
+ /// other threads are actively adding/deleting documents.
+ ///
+ ///
+ /// Exceptions:
+ ///
+ /// Because this class directly updates in-memory posting
+ /// lists, and flushes stored fields and term vectors
+ /// directly to files in the directory, there are certain
+ /// limited times when an exception can corrupt this state.
+ /// For example, a disk full while flushing stored fields
+ /// leaves this file in a corrupt state. Or, an OOM
+ /// exception while appending to the in-memory posting lists
+ /// can corrupt that posting list. We call such exceptions
+ /// "aborting exceptions". In these cases we must call
+ /// abort() to discard all docs added since the last flush.
+ ///
+ /// All other exceptions ("non-aborting exceptions") can
+ /// still partially update the index structures. These
+ /// updates are consistent, but, they represent only a part
+ /// of the document seen up until the exception was hit.
+ /// When this happens, we immediately mark the document as
+ /// deleted so that the document is always atomically ("all
+ /// or none") added to the index.
+ ///
+
+ public sealed class DocumentsWriter : IDisposable
+ {
+ internal class AnonymousClassIndexingChain:IndexingChain
+ {
+
+ internal override DocConsumer GetChain(DocumentsWriter documentsWriter)
+ {
+ /*
+ This is the current indexing chain:
+
+ DocConsumer / DocConsumerPerThread
+ --> code: DocFieldProcessor / DocFieldProcessorPerThread
+ --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
+ --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
+ --> code: DocInverter / DocInverterPerThread / DocInverterPerField
+ --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+ --> code: TermsHash / TermsHashPerThread / TermsHashPerField
+ --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
+ --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
+ --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
+ --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+ --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
+ --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
+ */
+
+ // Build up indexing chain:
+
+ TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
+ TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
+
+ InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, new TermsHash(documentsWriter, false, termVectorsWriter, null));
+ NormsWriter normsWriter = new NormsWriter();
+ DocInverter docInverter = new DocInverter(termsHash, normsWriter);
+ return new DocFieldProcessor(documentsWriter, docInverter);
+ }
+ }
+ private void InitBlock()
+ {
+ maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
+ maxBufferedDeleteTerms = IndexWriter.DEFAULT_MAX_BUFFERED_DELETE_TERMS;
+ ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024);
+ waitQueuePauseBytes = (long) (ramBufferSize * 0.1);
+ waitQueueResumeBytes = (long) (ramBufferSize * 0.05);
+ freeTrigger = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 1.05);
+ freeLevel = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 0.95);
+ maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS;
+ skipDocWriter = new SkipDocWriter();
+ byteBlockAllocator = new ByteBlockAllocator(this, DocumentsWriter.BYTE_BLOCK_SIZE);
+ perDocAllocator = new ByteBlockAllocator(this,DocumentsWriter.PER_DOC_BLOCK_SIZE);
+ waitQueue = new WaitQueue(this);
+ }
+
+ internal IndexWriter writer;
+ internal Directory directory;
+
+ internal System.String segment; // Current segment we are working on
+ private System.String docStoreSegment; // Current doc-store segment we are writing
+ private int docStoreOffset; // Current starting doc-store offset of current segment
+
+ private int nextDocID; // Next docID to be added
+ private int numDocsInRAM; // # docs buffered in RAM
+ internal int numDocsInStore; // # docs written to doc stores
+
+ // Max # ThreadState instances; if there are more threads
+ // than this they share ThreadStates
+ private const int MAX_THREAD_STATE = 5;
+ private DocumentsWriterThreadState[] threadStates = new DocumentsWriterThreadState[0];
+ private HashMap threadBindings = new HashMap();
+
+ private int pauseThreads; // Non-zero when we need all threads to
+ // pause (eg to flush)
+ internal bool flushPending; // True when a thread has decided to flush
+ internal bool bufferIsFull; // True when it's time to write segment
+ private bool aborting; // True if an abort is pending
+
+ private DocFieldProcessor docFieldProcessor;
+
+ internal System.IO.StreamWriter infoStream;
+ internal int maxFieldLength;
+ internal Similarity similarity;
+
+ internal IList newFiles;
+
+ internal class DocState
+ {
+ internal DocumentsWriter docWriter;
+ internal Analyzer analyzer;
+ internal int maxFieldLength;
+ internal System.IO.StreamWriter infoStream;
+ internal Similarity similarity;
+ internal int docID;
+ internal Document doc;
+ internal System.String maxTermPrefix;
+
+ // Only called by asserts
+ public bool TestPoint(System.String name)
+ {
+ return docWriter.writer.TestPoint(name);
+ }
+
+ public void Clear()
+ {
+ // don't hold onto doc nor analyzer, in case it is
+ // largish:
+ doc = null;
+ analyzer = null;
+ }
+ }
+
+ /// Consumer returns this on each doc. This holds any
+ /// state that must be flushed synchronized "in docID
+ /// order". We gather these and flush them in order.
+ ///
+ internal abstract class DocWriter
+ {
+ internal DocWriter next;
+ internal int docID;
+ public abstract void Finish();
+ public abstract void Abort();
+ public abstract long SizeInBytes();
+
+ internal void SetNext(DocWriter next)
+ {
+ this.next = next;
+ }
+ }
+
+ /*
+ * Create and return a new DocWriterBuffer.
+ */
+ internal PerDocBuffer NewPerDocBuffer()
+ {
+ return new PerDocBuffer(this);
+ }
+
+ /*
+ * RAMFile buffer for DocWriters.
+ */
+ internal class PerDocBuffer : Lucene.Net.Store.RAMFile
+ {
+ DocumentsWriter enclosingInstance;
+ public PerDocBuffer(DocumentsWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ /*
+ * Allocate bytes used from shared pool.
+ */
+ public override byte[] NewBuffer(int size)
+ {
+ System.Diagnostics.Debug.Assert(size == PER_DOC_BLOCK_SIZE);
+ return enclosingInstance.perDocAllocator.GetByteBlock(false);
+ }
+
+ /*
+ * Recycle the bytes used.
+ */
+ internal void Recycle()
+ {
+ lock (this)
+ {
+ if (buffers.Count > 0)
+ {
+ Length = 0;
+
+ // Recycle the blocks
+ enclosingInstance.perDocAllocator.RecycleByteBlocks(buffers);
+ buffers.Clear();
+ sizeInBytes = 0;
+
+ System.Diagnostics.Debug.Assert(NumBuffers() == 0);
+ }
+ }
+ }
+ }
+
+ /// The IndexingChain must define the method
+ /// which returns the DocConsumer that the DocumentsWriter calls to process the
+ /// documents.
+ ///
+ internal abstract class IndexingChain
+ {
+ internal abstract DocConsumer GetChain(DocumentsWriter documentsWriter);
+ }
+
+ internal static readonly IndexingChain DefaultIndexingChain;
+
+ internal DocConsumer consumer;
+
+ // Deletes done after the last flush; these are discarded
+ // on abort
+ private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
+
+ // Deletes done before the last flush; these are still
+ // kept on abort
+ private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
+
+ // The max number of delete terms that can be buffered before
+ // they must be flushed to disk.
+ private int maxBufferedDeleteTerms;
+
+ // How much RAM we can use before flushing. This is 0 if
+ // we are flushing by doc count instead.
+ private long ramBufferSize;
+ private long waitQueuePauseBytes;
+ private long waitQueueResumeBytes;
+
+ // If we've allocated 5% over our RAM budget, we then
+ // free down to 95%
+ private long freeTrigger;
+ private long freeLevel;
+
+ // Flush @ this number of docs. If ramBufferSize is
+ // non-zero we will flush by RAM usage instead.
+ private int maxBufferedDocs;
+
+ private int flushedDocCount; // How many docs already flushed to index
+
+ internal void UpdateFlushedDocCount(int n)
+ {
+ lock (this)
+ {
+ flushedDocCount += n;
+ }
+ }
+ internal int GetFlushedDocCount()
+ {
+ lock (this)
+ {
+ return flushedDocCount;
+ }
+ }
+ internal void SetFlushedDocCount(int n)
+ {
+ lock (this)
+ {
+ flushedDocCount = n;
+ }
+ }
+
+ private bool closed;
+
+ internal DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain)
+ {
+ InitBlock();
+ this.directory = directory;
+ this.writer = writer;
+ this.similarity = writer.Similarity;
+ flushedDocCount = writer.MaxDoc();
+
+ consumer = indexingChain.GetChain(this);
+ if (consumer is DocFieldProcessor)
+ {
+ docFieldProcessor = (DocFieldProcessor) consumer;
+ }
+ }
+
+ /// Returns true if any of the fields in the current
+ /// buffered docs have omitTermFreqAndPositions==false
+ ///
+ internal bool HasProx()
+ {
+ return (docFieldProcessor != null)?docFieldProcessor.fieldInfos.HasProx():true;
+ }
+
+ /// If non-null, various details of indexing are printed
+ /// here.
+ ///
+ internal void SetInfoStream(System.IO.StreamWriter infoStream)
+ {
+ lock (this)
+ {
+ this.infoStream = infoStream;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.infoStream = infoStream;
+ }
+ }
+
+ internal void SetMaxFieldLength(int maxFieldLength)
+ {
+ lock (this)
+ {
+ this.maxFieldLength = maxFieldLength;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.maxFieldLength = maxFieldLength;
+ }
+ }
+
+ internal void SetSimilarity(Similarity similarity)
+ {
+ lock (this)
+ {
+ this.similarity = similarity;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].docState.similarity = similarity;
+ }
+ }
+
+ /// Set how much RAM we can use before flushing.
+ internal void SetRAMBufferSizeMB(double mb)
+ {
+ lock (this)
+ {
+ if (mb == IndexWriter.DISABLE_AUTO_FLUSH)
+ {
+ ramBufferSize = IndexWriter.DISABLE_AUTO_FLUSH;
+ waitQueuePauseBytes = 4 * 1024 * 1024;
+ waitQueueResumeBytes = 2 * 1024 * 1024;
+ }
+ else
+ {
+ ramBufferSize = (long) (mb * 1024 * 1024);
+ waitQueuePauseBytes = (long) (ramBufferSize * 0.1);
+ waitQueueResumeBytes = (long) (ramBufferSize * 0.05);
+ freeTrigger = (long) (1.05 * ramBufferSize);
+ freeLevel = (long) (0.95 * ramBufferSize);
+ }
+ }
+ }
+
+ internal double GetRAMBufferSizeMB()
+ {
+ lock (this)
+ {
+ if (ramBufferSize == IndexWriter.DISABLE_AUTO_FLUSH)
+ {
+ return ramBufferSize;
+ }
+ else
+ {
+ return ramBufferSize / 1024.0 / 1024.0;
+ }
+ }
+ }
+
+ /// Gets or sets max buffered docs, which means we will flush by
+ /// doc count instead of by RAM usage.
+ ///
+ internal int MaxBufferedDocs
+ {
+ get { return maxBufferedDocs; }
+ set { maxBufferedDocs = value; }
+ }
+
+ /// Get current segment name we are writing.
+ internal string Segment
+ {
+ get { return segment; }
+ }
+
+ /// Returns how many docs are currently buffered in RAM.
+ internal int NumDocsInRAM
+ {
+ get { return numDocsInRAM; }
+ }
+
+ /// Returns the current doc store segment we are writing
+ /// to.
+ ///
+ internal string DocStoreSegment
+ {
+ get
+ {
+ lock (this)
+ {
+ return docStoreSegment;
+ }
+ }
+ }
+
+ /// Returns the doc offset into the shared doc store for
+ /// the current buffered docs.
+ ///
+ internal int DocStoreOffset
+ {
+ get { return docStoreOffset; }
+ }
+
+ /// Closes the current open doc stores an returns the doc
+ /// store segment name. This returns null if there are *
+ /// no buffered documents.
+ ///
+ internal System.String CloseDocStore()
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+
+ if (infoStream != null)
+ Message("closeDocStore: " + openFiles.Count + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore);
+
+ bool success = false;
+
+ try
+ {
+ InitFlushState(true);
+ closedFiles.Clear();
+
+ consumer.CloseDocStore(flushState);
+ System.Diagnostics.Debug.Assert(0 == openFiles.Count);
+
+ System.String s = docStoreSegment;
+ docStoreSegment = null;
+ docStoreOffset = 0;
+ numDocsInStore = 0;
+ success = true;
+ return s;
+ }
+ finally
+ {
+ if (!success)
+ {
+ Abort();
+ }
+ }
+ }
+ }
+
+ private ICollection abortedFiles; // List of files that were written before last abort()
+
+ private SegmentWriteState flushState;
+
+ internal ICollection AbortedFiles()
+ {
+ return abortedFiles;
+ }
+
+ internal void Message(System.String message)
+ {
+ if (infoStream != null)
+ writer.Message("DW: " + message);
+ }
+
+ internal IList openFiles = new List();
+ internal IList closedFiles = new List();
+
+ /* Returns Collection of files in use by this instance,
+ * including any flushed segments. */
+ internal IList OpenFiles()
+ {
+ lock (this)
+ {
+ // ToArray returns a copy
+ return openFiles.ToArray();
+ }
+ }
+
+ internal IList ClosedFiles()
+ {
+ lock (this)
+ {
+ // ToArray returns a copy
+ return closedFiles.ToArray();
+ }
+ }
+
+ internal void AddOpenFile(System.String name)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(!openFiles.Contains(name));
+ openFiles.Add(name);
+ }
+ }
+
+ internal void RemoveOpenFile(System.String name)
+ {
+ lock (this)
+ {
+ System.Diagnostics.Debug.Assert(openFiles.Contains(name));
+ openFiles.Remove(name);
+ closedFiles.Add(name);
+ }
+ }
+
+ internal void SetAborting()
+ {
+ lock (this)
+ {
+ aborting = true;
+ }
+ }
+
+ /// Called if we hit an exception at a bad time (when
+ /// updating the index files) and must discard all
+ /// currently buffered docs. This resets our state,
+ /// discarding any docs added since last flush.
+ ///
+ internal void Abort()
+ {
+ lock (this)
+ {
+ try
+ {
+ if (infoStream != null)
+ {
+ Message("docWriter: now abort");
+ }
+
+ // Forcefully remove waiting ThreadStates from line
+ waitQueue.Abort();
+
+ // Wait for all other threads to finish with
+ // DocumentsWriter:
+ PauseAllThreads();
+
+ try
+ {
+
+ System.Diagnostics.Debug.Assert(0 == waitQueue.numWaiting);
+
+ waitQueue.waitingBytes = 0;
+
+ try
+ {
+ abortedFiles = OpenFiles();
+ }
+ catch (System.Exception)
+ {
+ abortedFiles = null;
+ }
+
+ deletesInRAM.Clear();
+ deletesFlushed.Clear();
+ openFiles.Clear();
+
+ for (int i = 0; i < threadStates.Length; i++)
+ try
+ {
+ threadStates[i].consumer.Abort();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ try
+ {
+ consumer.Abort();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ docStoreSegment = null;
+ numDocsInStore = 0;
+ docStoreOffset = 0;
+
+ // Reset all postings data
+ DoAfterFlush();
+ }
+ finally
+ {
+ ResumeAllThreads();
+ }
+ }
+ finally
+ {
+ aborting = false;
+ System.Threading.Monitor.PulseAll(this);
+ if (infoStream != null)
+ {
+ Message("docWriter: done abort; abortedFiles=" + abortedFiles);
+ }
+ }
+ }
+ }
+
+ /// Reset after a flush
+ private void DoAfterFlush()
+ {
+ // All ThreadStates should be idle when we are called
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+ threadBindings.Clear();
+ waitQueue.Reset();
+ segment = null;
+ numDocsInRAM = 0;
+ nextDocID = 0;
+ bufferIsFull = false;
+ flushPending = false;
+ for (int i = 0; i < threadStates.Length; i++)
+ threadStates[i].DoAfterFlush();
+ numBytesUsed = 0;
+ }
+
+ // Returns true if an abort is in progress
+ internal bool PauseAllThreads()
+ {
+ lock (this)
+ {
+ pauseThreads++;
+ while (!AllThreadsIdle())
+ {
+ System.Threading.Monitor.Wait(this);
+ }
+
+ return aborting;
+ }
+ }
+
+ internal void ResumeAllThreads()
+ {
+ lock (this)
+ {
+ pauseThreads--;
+ System.Diagnostics.Debug.Assert(pauseThreads >= 0);
+ if (0 == pauseThreads)
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ private bool AllThreadsIdle()
+ {
+ lock (this)
+ {
+ for (int i = 0; i < threadStates.Length; i++)
+ if (!threadStates[i].isIdle)
+ return false;
+ return true;
+ }
+ }
+
+ internal bool AnyChanges
+ {
+ get
+ {
+ lock (this)
+ {
+ return numDocsInRAM != 0 || deletesInRAM.numTerms != 0 || deletesInRAM.docIDs.Count != 0 ||
+ deletesInRAM.queries.Count != 0;
+ }
+ }
+ }
+
+ private void InitFlushState(bool onlyDocStore)
+ {
+ lock (this)
+ {
+ InitSegmentName(onlyDocStore);
+ flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.TermIndexInterval);
+ }
+ }
+
+ /// Flush all pending docs to a new segment
+ internal int Flush(bool closeDocStore)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(AllThreadsIdle());
+
+ System.Diagnostics.Debug.Assert(numDocsInRAM > 0);
+
+ System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM);
+ System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0);
+ System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
+
+ InitFlushState(false);
+
+ docStoreOffset = numDocsInStore;
+
+ if (infoStream != null)
+ Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
+
+ bool success = false;
+
+ try
+ {
+
+ if (closeDocStore)
+ {
+ System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null);
+ System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName));
+ CloseDocStore();
+ flushState.numDocsInStore = 0;
+ }
+
+ ICollection threads = new HashSet();
+ for (int i = 0; i < threadStates.Length; i++)
+ threads.Add(threadStates[i].consumer);
+ consumer.Flush(threads, flushState);
+
+ if (infoStream != null)
+ {
+ SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory);
+ long newSegmentSize = si.SizeInBytes();
+ System.String message = System.String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}",
+ new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) });
+ Message(message);
+ }
+
+ flushedDocCount += flushState.numDocs;
+
+ DoAfterFlush();
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ Abort();
+ }
+ }
+
+ System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0);
+
+ return flushState.numDocs;
+ }
+ }
+
+ internal ICollection GetFlushedFiles()
+ {
+ return flushState.flushedFiles;
+ }
+
+ /// Build compound file for the segment we just flushed
+ internal void CreateCompoundFile(System.String segment)
+ {
+
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
+ foreach(string flushedFile in flushState.flushedFiles)
+ {
+ cfsWriter.AddFile(flushedFile);
+ }
+
+ // Perform the merge
+ cfsWriter.Close();
+ }
+
+ /// Set flushPending if it is not already set and returns
+ /// whether it was set. This is used by IndexWriter to
+ /// trigger a single flush even when multiple threads are
+ /// trying to do so.
+ ///
+ internal bool SetFlushPending()
+ {
+ lock (this)
+ {
+ if (flushPending)
+ return false;
+ else
+ {
+ flushPending = true;
+ return true;
+ }
+ }
+ }
+
+ internal void ClearFlushPending()
+ {
+ lock (this)
+ {
+ flushPending = false;
+ }
+ }
+
+ internal void PushDeletes()
+ {
+ lock (this)
+ {
+ deletesFlushed.Update(deletesInRAM);
+ }
+ }
+
+ public void Dispose()
+ {
+ // Move to protected method if class becomes unsealed
+ lock (this)
+ {
+ closed = true;
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ internal void InitSegmentName(bool onlyDocStore)
+ {
+ lock (this)
+ {
+ if (segment == null && (!onlyDocStore || docStoreSegment == null))
+ {
+ segment = writer.NewSegmentName();
+ System.Diagnostics.Debug.Assert(numDocsInRAM == 0);
+ }
+ if (docStoreSegment == null)
+ {
+ docStoreSegment = segment;
+ System.Diagnostics.Debug.Assert(numDocsInStore == 0);
+ }
+ }
+ }
+
+ /// Returns a free (idle) ThreadState that may be used for
+ /// indexing this one document. This call also pauses if a
+ /// flush is pending. If delTerm is non-null then we
+ /// buffer this deleted term after the thread state has
+ /// been acquired.
+ ///
+ internal DocumentsWriterThreadState GetThreadState(Document doc, Term delTerm)
+ {
+ lock (this)
+ {
+
+ // First, find a thread state. If this thread already
+ // has affinity to a specific ThreadState, use that one
+ // again.
+ DocumentsWriterThreadState state = threadBindings[ThreadClass.Current()];
+ if (state == null)
+ {
+
+ // First time this thread has called us since last
+ // flush. Find the least loaded thread state:
+ DocumentsWriterThreadState minThreadState = null;
+ for (int i = 0; i < threadStates.Length; i++)
+ {
+ DocumentsWriterThreadState ts = threadStates[i];
+ if (minThreadState == null || ts.numThreads < minThreadState.numThreads)
+ minThreadState = ts;
+ }
+ if (minThreadState != null && (minThreadState.numThreads == 0 || threadStates.Length >= MAX_THREAD_STATE))
+ {
+ state = minThreadState;
+ state.numThreads++;
+ }
+ else
+ {
+ // Just create a new "private" thread state
+ DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1 + threadStates.Length];
+ if (threadStates.Length > 0)
+ Array.Copy(threadStates, 0, newArray, 0, threadStates.Length);
+ state = newArray[threadStates.Length] = new DocumentsWriterThreadState(this);
+ threadStates = newArray;
+ }
+ threadBindings[ThreadClass.Current()] = state;
+ }
+
+ // Next, wait until my thread state is idle (in case
+ // it's shared with other threads) and for threads to
+ // not be paused nor a flush pending:
+ WaitReady(state);
+
+ // Allocate segment name if this is the first doc since
+ // last flush:
+ InitSegmentName(false);
+
+ state.isIdle = false;
+
+ bool success = false;
+ try
+ {
+ state.docState.docID = nextDocID;
+
+ System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init start"));
+
+ if (delTerm != null)
+ {
+ AddDeleteTerm(delTerm, state.docState.docID);
+ state.doFlushAfter = TimeToFlushDeletes();
+ }
+
+ System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init after delTerm"));
+
+ nextDocID++;
+ numDocsInRAM++;
+
+ // We must at this point commit to flushing to ensure we
+ // always get N docs when we flush by doc count, even if
+ // > 1 thread is adding documents:
+ if (!flushPending && maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs)
+ {
+ flushPending = true;
+ state.doFlushAfter = true;
+ }
+
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ // Forcefully idle this ThreadState:
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ if (state.doFlushAfter)
+ {
+ state.doFlushAfter = false;
+ flushPending = false;
+ }
+ }
+ }
+
+ return state;
+ }
+ }
+
+ /// Returns true if the caller (IndexWriter) should now
+ /// flush.
+ ///
+ internal bool AddDocument(Document doc, Analyzer analyzer)
+ {
+ return UpdateDocument(doc, analyzer, null);
+ }
+
+ internal bool UpdateDocument(Term t, Document doc, Analyzer analyzer)
+ {
+ return UpdateDocument(doc, analyzer, t);
+ }
+
+ internal bool UpdateDocument(Document doc, Analyzer analyzer, Term delTerm)
+ {
+
+ // This call is synchronized but fast
+ DocumentsWriterThreadState state = GetThreadState(doc, delTerm);
+
+ DocState docState = state.docState;
+ docState.doc = doc;
+ docState.analyzer = analyzer;
+
+ bool doReturnFalse = false; // {{Aroush-2.9}} to handle return from finally clause
+
+ bool success = false;
+ try
+ {
+ // This call is not synchronized and does all the
+ // work
+ DocWriter perDoc;
+ try
+ {
+ perDoc = state.consumer.ProcessDocument();
+ }
+ finally
+ {
+ docState.Clear();
+ }
+ // This call is synchronized but fast
+ FinishDocument(state, perDoc);
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ {
+ lock (this)
+ {
+
+ if (aborting)
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ Abort();
+ }
+ else
+ {
+ skipDocWriter.docID = docState.docID;
+ bool success2 = false;
+ try
+ {
+ waitQueue.Add(skipDocWriter);
+ success2 = true;
+ }
+ finally
+ {
+ if (!success2)
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ Abort();
+ // return false; // {{Aroush-2.9}} this 'return false' is move to outside finally
+ doReturnFalse = true;
+ }
+ }
+
+ if (!doReturnFalse) // {{Aroush-2.9}} added because of the above 'return false' removal
+ {
+ state.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+
+ // If this thread state had decided to flush, we
+ // must clear it so another thread can flush
+ if (state.doFlushAfter)
+ {
+ state.doFlushAfter = false;
+ flushPending = false;
+ System.Threading.Monitor.PulseAll(this);
+ }
+
+ // Immediately mark this document as deleted
+ // since likely it was partially added. This
+ // keeps indexing as "all or none" (atomic) when
+ // adding a document:
+ AddDeleteDocID(state.docState.docID);
+ }
+ }
+ }
+ }
+ }
+
+ if (doReturnFalse) // {{Aroush-2.9}} see comment abouve
+ {
+ return false;
+ }
+
+ return state.doFlushAfter || TimeToFlushDeletes();
+ }
+
+ // for testing
+ internal int GetNumBufferedDeleteTerms()
+ {
+ lock (this)
+ {
+ return deletesInRAM.numTerms;
+ }
+ }
+
+ // for testing
+ internal IDictionary GetBufferedDeleteTerms()
+ {
+ lock (this)
+ {
+ return deletesInRAM.terms;
+ }
+ }
+
+ /// Called whenever a merge has completed and the merged segments had deletions
+ internal void RemapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount)
+ {
+ lock (this)
+ {
+ if (docMaps == null)
+ // The merged segments had no deletes so docIDs did not change and we have nothing to do
+ return ;
+ MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount);
+ deletesInRAM.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
+ deletesFlushed.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount);
+ flushedDocCount -= mapper.docShift;
+ }
+ }
+
+ private void WaitReady(DocumentsWriterThreadState state)
+ {
+ lock (this)
+ {
+
+ while (!closed && ((state != null && !state.isIdle) || pauseThreads != 0 || flushPending || aborting))
+ {
+ System.Threading.Monitor.Wait(this);
+ }
+
+ if (closed)
+ throw new AlreadyClosedException("this IndexWriter is closed");
+ }
+ }
+
+ internal bool BufferDeleteTerms(Term[] terms)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ for (int i = 0; i < terms.Length; i++)
+ AddDeleteTerm(terms[i], numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteTerm(Term term)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ AddDeleteTerm(term, numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteQueries(Query[] queries)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ for (int i = 0; i < queries.Length; i++)
+ AddDeleteQuery(queries[i], numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool BufferDeleteQuery(Query query)
+ {
+ lock (this)
+ {
+ WaitReady(null);
+ AddDeleteQuery(query, numDocsInRAM);
+ return TimeToFlushDeletes();
+ }
+ }
+
+ internal bool DeletesFull()
+ {
+ lock (this)
+ {
+ return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed + numBytesUsed) >= ramBufferSize) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms));
+ }
+ }
+
+ internal bool DoApplyDeletes()
+ {
+ lock (this)
+ {
+ // Very similar to deletesFull(), except we don't count
+ // numBytesAlloc, because we are checking whether
+ // deletes (alone) are consuming too many resources now
+ // and thus should be applied. We apply deletes if RAM
+ // usage is > 1/2 of our allowed RAM buffer, to prevent
+ // too-frequent flushing of a long tail of tiny segments
+ // when merges (which always apply deletes) are
+ // infrequent.
+ return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed) >= ramBufferSize / 2) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms));
+ }
+ }
+
+ private bool TimeToFlushDeletes()
+ {
+ lock (this)
+ {
+ return (bufferIsFull || DeletesFull()) && SetFlushPending();
+ }
+ }
+
+ internal int MaxBufferedDeleteTerms
+ {
+ set { this.maxBufferedDeleteTerms = value; }
+ get { return maxBufferedDeleteTerms; }
+ }
+
+ internal bool HasDeletes()
+ {
+ lock (this)
+ {
+ return deletesFlushed.Any();
+ }
+ }
+
+ internal bool ApplyDeletes(SegmentInfos infos)
+ {
+ lock (this)
+ {
+ if (!HasDeletes())
+ return false;
+
+ if (infoStream != null)
+ Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments.");
+
+ int infosEnd = infos.Count;
+
+ int docStart = 0;
+ bool any = false;
+ for (int i = 0; i < infosEnd; i++)
+ {
+
+ // Make sure we never attempt to apply deletes to
+ // segment in external dir
+ System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory);
+
+ SegmentReader reader = writer.readerPool.Get(infos.Info(i), false);
+ try
+ {
+ any |= ApplyDeletes(reader, docStart);
+ docStart += reader.MaxDoc;
+ }
+ finally
+ {
+ writer.readerPool.Release(reader);
+ }
+ }
+
+ deletesFlushed.Clear();
+
+ return any;
+ }
+ }
+
+ // used only by assert
+ private Term lastDeleteTerm;
+
+ // used only by assert
+ private bool CheckDeleteTerm(Term term)
+ {
+ if (term != null) {
+ System.Diagnostics.Debug.Assert(lastDeleteTerm == null || term.CompareTo(lastDeleteTerm) > 0, "lastTerm=" + lastDeleteTerm + " vs term=" + term);
+ }
+ lastDeleteTerm = term;
+ return true;
+ }
+
+ // Apply buffered delete terms, queries and docIDs to the
+ // provided reader
+ private bool ApplyDeletes(IndexReader reader, int docIDStart)
+ {
+ lock (this)
+ {
+ int docEnd = docIDStart + reader.MaxDoc;
+ bool any = false;
+
+ System.Diagnostics.Debug.Assert(CheckDeleteTerm(null));
+
+ // Delete by term
+ TermDocs docs = reader.TermDocs();
+ try
+ {
+ foreach(KeyValuePair entry in deletesFlushed.terms)
+ {
+ Term term = entry.Key;
+ // LUCENE-2086: we should be iterating a TreeMap,
+ // here, so terms better be in order:
+ System.Diagnostics.Debug.Assert(CheckDeleteTerm(term));
+ docs.Seek(term);
+ int limit = entry.Value.GetNum();
+ while (docs.Next())
+ {
+ int docID = docs.Doc;
+ if (docIDStart + docID >= limit)
+ break;
+ reader.DeleteDocument(docID);
+ any = true;
+ }
+ }
+ }
+ finally
+ {
+ docs.Close();
+ }
+
+ // Delete by docID
+ foreach(int docIdInt in deletesFlushed.docIDs)
+ {
+ int docID = docIdInt;
+ if (docID >= docIDStart && docID < docEnd)
+ {
+ reader.DeleteDocument(docID - docIDStart);
+ any = true;
+ }
+ }
+
+ // Delete by query
+ IndexSearcher searcher = new IndexSearcher(reader);
+ foreach(KeyValuePair entry in deletesFlushed.queries)
+ {
+ Query query = (Query) entry.Key;
+ int limit = (int)entry.Value;
+ Weight weight = query.Weight(searcher);
+ Scorer scorer = weight.Scorer(reader, true, false);
+ if (scorer != null)
+ {
+ while (true)
+ {
+ int doc = scorer.NextDoc();
+ if (((long) docIDStart) + doc >= limit)
+ break;
+ reader.DeleteDocument(doc);
+ any = true;
+ }
+ }
+ }
+ searcher.Close();
+ return any;
+ }
+ }
+
+ // Buffer a term in bufferedDeleteTerms, which records the
+ // current number of documents buffered in ram so that the
+ // delete term will be applied to those documents as well
+ // as the disk segments.
+ private void AddDeleteTerm(Term term, int docCount)
+ {
+ lock (this)
+ {
+ BufferedDeletes.Num num = deletesInRAM.terms[term];
+ int docIDUpto = flushedDocCount + docCount;
+ if (num == null)
+ deletesInRAM.terms[term] = new BufferedDeletes.Num(docIDUpto);
+ else
+ num.SetNum(docIDUpto);
+ deletesInRAM.numTerms++;
+
+ deletesInRAM.AddBytesUsed(BYTES_PER_DEL_TERM + term.Text.Length * CHAR_NUM_BYTE);
+ }
+ }
+
+ // Buffer a specific docID for deletion. Currently only
+ // used when we hit a exception when adding a document
+ private void AddDeleteDocID(int docID)
+ {
+ lock (this)
+ {
+ deletesInRAM.docIDs.Add(flushedDocCount + docID);
+ deletesInRAM.AddBytesUsed(BYTES_PER_DEL_DOCID);
+ }
+ }
+
+ private void AddDeleteQuery(Query query, int docID)
+ {
+ lock (this)
+ {
+ deletesInRAM.queries[query] = flushedDocCount + docID;
+ deletesInRAM.AddBytesUsed(BYTES_PER_DEL_QUERY);
+ }
+ }
+
+ internal bool DoBalanceRAM()
+ {
+ lock (this)
+ {
+ return ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && !bufferIsFull && (numBytesUsed + deletesInRAM.bytesUsed + deletesFlushed.bytesUsed >= ramBufferSize || numBytesAlloc >= freeTrigger);
+ }
+ }
+
+ /// Does the synchronized work to finish/flush the
+ /// inverted document.
+ ///
+ private void FinishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter)
+ {
+
+ if (DoBalanceRAM())
+ // Must call this w/o holding synchronized(this) else
+ // we'll hit deadlock:
+ BalanceRAM();
+
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(docWriter == null || docWriter.docID == perThread.docState.docID);
+
+ if (aborting)
+ {
+
+ // We are currently aborting, and another thread is
+ // waiting for me to become idle. We just forcefully
+ // idle this threadState; it will be fully reset by
+ // abort()
+ if (docWriter != null)
+ try
+ {
+ docWriter.Abort();
+ }
+ catch (System.Exception)
+ {
+ }
+
+ perThread.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ return ;
+ }
+
+ bool doPause;
+
+ if (docWriter != null)
+ doPause = waitQueue.Add(docWriter);
+ else
+ {
+ skipDocWriter.docID = perThread.docState.docID;
+ doPause = waitQueue.Add(skipDocWriter);
+ }
+
+ if (doPause)
+ WaitForWaitQueue();
+
+ if (bufferIsFull && !flushPending)
+ {
+ flushPending = true;
+ perThread.doFlushAfter = true;
+ }
+
+ perThread.isIdle = true;
+ System.Threading.Monitor.PulseAll(this);
+ }
+ }
+
+ internal void WaitForWaitQueue()
+ {
+ lock (this)
+ {
+ do
+ {
+ System.Threading.Monitor.Wait(this);
+ }
+ while (!waitQueue.DoResume());
+ }
+ }
+
+ internal class SkipDocWriter:DocWriter
+ {
+ public override void Finish()
+ {
+ }
+ public override void Abort()
+ {
+ }
+ public override long SizeInBytes()
+ {
+ return 0;
+ }
+ }
+ internal SkipDocWriter skipDocWriter;
+
+ internal long GetRAMUsed()
+ {
+ return numBytesUsed + deletesInRAM.bytesUsed + deletesFlushed.bytesUsed;
+ }
+
+ internal long numBytesAlloc;
+ internal long numBytesUsed;
+
+ internal System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat;
+
+ // Coarse estimates used to measure RAM usage of buffered deletes
+ internal const int OBJECT_HEADER_BYTES = 8;
+ internal static readonly int POINTER_NUM_BYTE;
+ internal const int INT_NUM_BYTE = 4;
+ internal const int CHAR_NUM_BYTE = 2;
+
+ /* Rough logic: HashMap has an array[Entry] w/ varying
+ load factor (say 2 * POINTER). Entry is object w/ Term
+ key, BufferedDeletes.Num val, int hash, Entry next
+ (OBJ_HEADER + 3*POINTER + INT). Term is object w/
+ String field and String text (OBJ_HEADER + 2*POINTER).
+ We don't count Term's field since it's interned.
+ Term's text is String (OBJ_HEADER + 4*INT + POINTER +
+ OBJ_HEADER + string.length*CHAR). BufferedDeletes.num is
+ OBJ_HEADER + INT. */
+
+ internal static readonly int BYTES_PER_DEL_TERM = 8 * POINTER_NUM_BYTE + 5 * OBJECT_HEADER_BYTES + 6 * INT_NUM_BYTE;
+
+ /* Rough logic: del docIDs are List. Say list
+ allocates ~2X size (2*POINTER). Integer is OBJ_HEADER
+ + int */
+ internal static readonly int BYTES_PER_DEL_DOCID = 2 * POINTER_NUM_BYTE + OBJECT_HEADER_BYTES + INT_NUM_BYTE;
+
+ /* Rough logic: HashMap has an array[Entry] w/ varying
+ load factor (say 2 * POINTER). Entry is object w/
+ Query key, Integer val, int hash, Entry next
+ (OBJ_HEADER + 3*POINTER + INT). Query we often
+ undercount (say 24 bytes). Integer is OBJ_HEADER + INT. */
+ internal static readonly int BYTES_PER_DEL_QUERY = 5 * POINTER_NUM_BYTE + 2 * OBJECT_HEADER_BYTES + 2 * INT_NUM_BYTE + 24;
+
+ /* Initial chunks size of the shared byte[] blocks used to
+ store postings data */
+ internal const int BYTE_BLOCK_SHIFT = 15;
+ internal static readonly int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
+ internal static readonly int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1;
+ internal static readonly int BYTE_BLOCK_NOT_MASK = ~ BYTE_BLOCK_MASK;
+
+ internal class ByteBlockAllocator : ByteBlockPool.Allocator
+ {
+ public ByteBlockAllocator(DocumentsWriter enclosingInstance, int blockSize)
+ {
+ this.blockSize = blockSize;
+ InitBlock(enclosingInstance);
+ }
+ private void InitBlock(DocumentsWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DocumentsWriter enclosingInstance;
+ public DocumentsWriter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+
+ int blockSize;
+ internal List freeByteBlocks = new List();
+
+ /* Allocate another byte[] from the shared pool */
+ public /*internal*/ override byte[] GetByteBlock(bool trackAllocations)
+ {
+ lock (Enclosing_Instance)
+ {
+ int size = freeByteBlocks.Count;
+ byte[] b;
+ if (0 == size)
+ {
+ // Always record a block allocated, even if
+ // trackAllocations is false. This is necessary
+ // because this block will be shared between
+ // things that don't track allocations (term
+ // vectors) and things that do (freq/prox
+ // postings).
+ Enclosing_Instance.numBytesAlloc += blockSize;
+ b = new byte[blockSize];
+ }
+ else
+ {
+ b = freeByteBlocks[size - 1];
+ freeByteBlocks.RemoveAt(size - 1);
+ }
+ if (trackAllocations)
+ Enclosing_Instance.numBytesUsed += blockSize;
+ System.Diagnostics.Debug.Assert(Enclosing_Instance.numBytesUsed <= Enclosing_Instance.numBytesAlloc);
+ return b;
+ }
+ }
+
+ /* Return byte[]'s to the pool */
+ public /*internal*/ override void RecycleByteBlocks(byte[][] blocks, int start, int end)
+ {
+ lock (Enclosing_Instance)
+ {
+ for (int i = start; i < end; i++)
+ {
+ freeByteBlocks.Add(blocks[i]);
+ blocks[i] = null;
+ }
+ }
+ }
+
+ public /*internal*/ override void RecycleByteBlocks(IList blocks)
+ {
+ lock (Enclosing_Instance)
+ {
+ int size = blocks.Count;
+ for(int i=0;i freeIntBlocks = new List();
+
+ /* Allocate another int[] from the shared pool */
+ internal int[] GetIntBlock(bool trackAllocations)
+ {
+ lock (this)
+ {
+ int size = freeIntBlocks.Count;
+ int[] b;
+ if (0 == size)
+ {
+ // Always record a block allocated, even if
+ // trackAllocations is false. This is necessary
+ // because this block will be shared between
+ // things that don't track allocations (term
+ // vectors) and things that do (freq/prox
+ // postings).
+ numBytesAlloc += INT_BLOCK_SIZE * INT_NUM_BYTE;
+ b = new int[INT_BLOCK_SIZE];
+ }
+ else
+ {
+ b = freeIntBlocks[size - 1];
+ freeIntBlocks.RemoveAt(size - 1);
+ }
+ if (trackAllocations)
+ numBytesUsed += INT_BLOCK_SIZE * INT_NUM_BYTE;
+ System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc);
+ return b;
+ }
+ }
+
+ internal void BytesAllocated(long numBytes)
+ {
+ lock (this)
+ {
+ numBytesAlloc += numBytes;
+ }
+ }
+
+ internal void BytesUsed(long numBytes)
+ {
+ lock (this)
+ {
+ numBytesUsed += numBytes;
+ System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc);
+ }
+ }
+
+ /* Return int[]s to the pool */
+ internal void RecycleIntBlocks(int[][] blocks, int start, int end)
+ {
+ lock (this)
+ {
+ for (int i = start; i < end; i++)
+ {
+ freeIntBlocks.Add(blocks[i]);
+ blocks[i] = null;
+ }
+ }
+ }
+
+ internal ByteBlockAllocator byteBlockAllocator;
+
+ internal static int PER_DOC_BLOCK_SIZE = 1024;
+
+ ByteBlockAllocator perDocAllocator;
+
+ /* Initial chunk size of the shared char[] blocks used to
+ store term text */
+ internal const int CHAR_BLOCK_SHIFT = 14;
+ internal static readonly int CHAR_BLOCK_SIZE = 1 << CHAR_BLOCK_SHIFT;
+ internal static readonly int CHAR_BLOCK_MASK = CHAR_BLOCK_SIZE - 1;
+
+ internal static readonly int MAX_TERM_LENGTH = CHAR_BLOCK_SIZE - 1;
+
+ private List freeCharBlocks = new List();
+
+ /* Allocate another char[] from the shared pool */
+ internal char[] GetCharBlock()
+ {
+ lock (this)
+ {
+ int size = freeCharBlocks.Count;
+ char[] c;
+ if (0 == size)
+ {
+ numBytesAlloc += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE;
+ c = new char[CHAR_BLOCK_SIZE];
+ }
+ else
+ {
+ c = freeCharBlocks[size - 1];
+ freeCharBlocks.RemoveAt(size - 1);
+ }
+ // We always track allocations of char blocks, for now,
+ // because nothing that skips allocation tracking
+ // (currently only term vectors) uses its own char
+ // blocks.
+ numBytesUsed += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE;
+ System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc);
+ return c;
+ }
+ }
+
+ /* Return char[]s to the pool */
+ internal void RecycleCharBlocks(char[][] blocks, int numBlocks)
+ {
+ lock (this)
+ {
+ for (int i = 0; i < numBlocks; i++)
+ {
+ freeCharBlocks.Add(blocks[i]);
+ blocks[i] = null;
+ }
+ }
+ }
+
+ internal System.String ToMB(long v)
+ {
+ return System.String.Format(nf, "{0:f}", new System.Object[] { (v / 1024F / 1024F) });
+ }
+
+
+ /* We have four pools of RAM: Postings, byte blocks
+ * (holds freq/prox posting data), char blocks (holds
+ * characters in the term) and per-doc buffers (stored fields/term vectors).
+ * Different docs require varying amount of storage from
+ * these four classes.
+ *
+ * For example, docs with many unique single-occurrence
+ * short terms will use up the Postings RAM and hardly any
+ * of the other two. Whereas docs with very large terms
+ * will use alot of char blocks RAM and relatively less of
+ * the other two. This method just frees allocations from
+ * the pools once we are over-budget, which balances the
+ * pools to match the current docs. */
+ internal void BalanceRAM()
+ {
+
+ // We flush when we've used our target usage
+ long flushTrigger = ramBufferSize;
+
+ long deletesRAMUsed = deletesInRAM.bytesUsed + deletesFlushed.bytesUsed;
+
+ if (numBytesAlloc + deletesRAMUsed > freeTrigger)
+ {
+
+ if (infoStream != null)
+ Message(
+ " RAM: now balance allocations: usedMB=" + ToMB(numBytesUsed) +
+ " vs trigger=" + ToMB(flushTrigger) +
+ " allocMB=" + ToMB(numBytesAlloc) +
+ " deletesMB=" + ToMB(deletesRAMUsed) +
+ " vs trigger=" + ToMB(freeTrigger) +
+ " byteBlockFree=" + ToMB(byteBlockAllocator.freeByteBlocks.Count * BYTE_BLOCK_SIZE) +
+ " perDocFree=" + ToMB(perDocAllocator.freeByteBlocks.Count * PER_DOC_BLOCK_SIZE) +
+ " charBlockFree=" + ToMB(freeCharBlocks.Count * CHAR_BLOCK_SIZE * CHAR_NUM_BYTE));
+
+ long startBytesAlloc = numBytesAlloc + deletesRAMUsed;
+
+ int iter = 0;
+
+ // We free equally from each pool in 32 KB
+ // chunks until we are below our threshold
+ // (freeLevel)
+
+ bool any = true;
+
+ while (numBytesAlloc + deletesRAMUsed > freeLevel)
+ {
+
+ lock (this)
+ {
+ if (0 == perDocAllocator.freeByteBlocks.Count
+ && 0 == byteBlockAllocator.freeByteBlocks.Count
+ && 0 == freeCharBlocks.Count
+ && 0 == freeIntBlocks.Count
+ && !any)
+ {
+ // Nothing else to free -- must flush now.
+ bufferIsFull = numBytesUsed + deletesRAMUsed > flushTrigger;
+ if (infoStream != null)
+ {
+ if (bufferIsFull)
+ Message(" nothing to free; now set bufferIsFull");
+ else
+ Message(" nothing to free");
+ }
+ System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc);
+ break;
+ }
+
+ if ((0 == iter % 5) && byteBlockAllocator.freeByteBlocks.Count > 0)
+ {
+ byteBlockAllocator.freeByteBlocks.RemoveAt(byteBlockAllocator.freeByteBlocks.Count - 1);
+ numBytesAlloc -= BYTE_BLOCK_SIZE;
+ }
+
+ if ((1 == iter % 5) && freeCharBlocks.Count > 0)
+ {
+ freeCharBlocks.RemoveAt(freeCharBlocks.Count - 1);
+ numBytesAlloc -= CHAR_BLOCK_SIZE * CHAR_NUM_BYTE;
+ }
+
+ if ((2 == iter % 5) && freeIntBlocks.Count > 0)
+ {
+ freeIntBlocks.RemoveAt(freeIntBlocks.Count - 1);
+ numBytesAlloc -= INT_BLOCK_SIZE * INT_NUM_BYTE;
+ }
+
+ if ((3 == iter % 5) && perDocAllocator.freeByteBlocks.Count > 0)
+ {
+ // Remove upwards of 32 blocks (each block is 1K)
+ for (int i = 0; i < 32; ++i)
+ {
+ perDocAllocator.freeByteBlocks.RemoveAt(perDocAllocator.freeByteBlocks.Count - 1);
+ numBytesAlloc -= PER_DOC_BLOCK_SIZE;
+ if (perDocAllocator.freeByteBlocks.Count == 0)
+ {
+ break;
+ }
+ }
+ }
+ }
+
+ if ((4 == iter % 5) && any)
+ // Ask consumer to free any recycled state
+ any = consumer.FreeRAM();
+
+ iter++;
+ }
+
+ if (infoStream != null)
+ Message(System.String.Format(nf, " after free: freedMB={0:f} usedMB={1:f} allocMB={2:f}",
+ new System.Object[] { ((startBytesAlloc - numBytesAlloc) / 1024.0 / 1024.0), (numBytesUsed / 1024.0 / 1024.0), (numBytesAlloc / 1024.0 / 1024.0) }));
+ }
+ else
+ {
+ // If we have not crossed the 100% mark, but have
+ // crossed the 95% mark of RAM we are actually
+ // using, go ahead and flush. This prevents
+ // over-allocating and then freeing, with every
+ // flush.
+ lock (this)
+ {
+
+ if (numBytesUsed + deletesRAMUsed > flushTrigger)
+ {
+ if (infoStream != null)
+ Message(System.String.Format(nf, " RAM: now flush @ usedMB={0:f} allocMB={1:f} triggerMB={2:f}",
+ new object[] { (numBytesUsed / 1024.0 / 1024.0), (numBytesAlloc / 1024.0 / 1024.0), (flushTrigger / 1024.0 / 1024.0) }));
+
+ bufferIsFull = true;
+ }
+ }
+ }
+ }
+
+ internal WaitQueue waitQueue;
+
+ internal class WaitQueue
+ {
+ private void InitBlock(DocumentsWriter enclosingInstance)
+ {
+ this.enclosingInstance = enclosingInstance;
+ }
+ private DocumentsWriter enclosingInstance;
+ public DocumentsWriter Enclosing_Instance
+ {
+ get
+ {
+ return enclosingInstance;
+ }
+
+ }
+ internal DocWriter[] waiting;
+ internal int nextWriteDocID;
+ internal int nextWriteLoc;
+ internal int numWaiting;
+ internal long waitingBytes;
+
+ public WaitQueue(DocumentsWriter enclosingInstance)
+ {
+ InitBlock(enclosingInstance);
+ waiting = new DocWriter[10];
+ }
+
+ internal void Reset()
+ {
+ lock (this)
+ {
+ // NOTE: nextWriteLoc doesn't need to be reset
+ System.Diagnostics.Debug.Assert(numWaiting == 0);
+ System.Diagnostics.Debug.Assert(waitingBytes == 0);
+ nextWriteDocID = 0;
+ }
+ }
+
+ internal bool DoResume()
+ {
+ lock (this)
+ {
+ return waitingBytes <= Enclosing_Instance.waitQueueResumeBytes;
+ }
+ }
+
+ internal bool DoPause()
+ {
+ lock (this)
+ {
+ return waitingBytes > Enclosing_Instance.waitQueuePauseBytes;
+ }
+ }
+
+ internal void Abort()
+ {
+ lock (this)
+ {
+ int count = 0;
+ for (int i = 0; i < waiting.Length; i++)
+ {
+ DocWriter doc = waiting[i];
+ if (doc != null)
+ {
+ doc.Abort();
+ waiting[i] = null;
+ count++;
+ }
+ }
+ waitingBytes = 0;
+ System.Diagnostics.Debug.Assert(count == numWaiting);
+ numWaiting = 0;
+ }
+ }
+
+ private void WriteDocument(DocWriter doc)
+ {
+ System.Diagnostics.Debug.Assert(doc == Enclosing_Instance.skipDocWriter || nextWriteDocID == doc.docID);
+ bool success = false;
+ try
+ {
+ doc.Finish();
+ nextWriteDocID++;
+ Enclosing_Instance.numDocsInStore++;
+ nextWriteLoc++;
+ System.Diagnostics.Debug.Assert(nextWriteLoc <= waiting.Length);
+ if (nextWriteLoc == waiting.Length)
+ nextWriteLoc = 0;
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ Enclosing_Instance.SetAborting();
+ }
+ }
+
+ public bool Add(DocWriter doc)
+ {
+ lock (this)
+ {
+
+ System.Diagnostics.Debug.Assert(doc.docID >= nextWriteDocID);
+
+ if (doc.docID == nextWriteDocID)
+ {
+ WriteDocument(doc);
+ while (true)
+ {
+ doc = waiting[nextWriteLoc];
+ if (doc != null)
+ {
+ numWaiting--;
+ waiting[nextWriteLoc] = null;
+ waitingBytes -= doc.SizeInBytes();
+ WriteDocument(doc);
+ }
+ else
+ break;
+ }
+ }
+ else
+ {
+
+ // I finished before documents that were added
+ // before me. This can easily happen when I am a
+ // small doc and the docs before me were large, or,
+ // just due to luck in the thread scheduling. Just
+ // add myself to the queue and when that large doc
+ // finishes, it will flush me:
+ int gap = doc.docID - nextWriteDocID;
+ if (gap >= waiting.Length)
+ {
+ // Grow queue
+ DocWriter[] newArray = new DocWriter[ArrayUtil.GetNextSize(gap)];
+ System.Diagnostics.Debug.Assert(nextWriteLoc >= 0);
+ Array.Copy(waiting, nextWriteLoc, newArray, 0, waiting.Length - nextWriteLoc);
+ Array.Copy(waiting, 0, newArray, waiting.Length - nextWriteLoc, nextWriteLoc);
+ nextWriteLoc = 0;
+ waiting = newArray;
+ gap = doc.docID - nextWriteDocID;
+ }
+
+ int loc = nextWriteLoc + gap;
+ if (loc >= waiting.Length)
+ loc -= waiting.Length;
+
+ // We should only wrap one time
+ System.Diagnostics.Debug.Assert(loc < waiting.Length);
+
+ // Nobody should be in my spot!
+ System.Diagnostics.Debug.Assert(waiting [loc] == null);
+ waiting[loc] = doc;
+ numWaiting++;
+ waitingBytes += doc.SizeInBytes();
+ }
+
+ return DoPause();
+ }
+ }
+ }
+ static DocumentsWriter()
+ {
+ DefaultIndexingChain = new AnonymousClassIndexingChain();
+ POINTER_NUM_BYTE = Constants.JRE_IS_64BIT?8:4;
+ }
+
+ public static int BYTE_BLOCK_SIZE_ForNUnit
+ {
+ get { return BYTE_BLOCK_SIZE; }
+ }
+
+ public static int CHAR_BLOCK_SIZE_ForNUnit
+ {
+ get { return CHAR_BLOCK_SIZE; }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/DocumentsWriterThreadState.cs b/external/Lucene.Net.Light/src/core/Index/DocumentsWriterThreadState.cs
new file mode 100644
index 0000000000..e20fbee8a2
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/DocumentsWriterThreadState.cs
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ /// Used by DocumentsWriter to maintain per-thread state.
+ /// We keep a separate Posting hash and other state for each
+ /// thread and then merge postings hashes from all threads
+ /// when writing the segment.
+ ///
+ sealed class DocumentsWriterThreadState
+ {
+
+ internal bool isIdle = true; // false if this is currently in use by a thread
+ internal int numThreads = 1; // Number of threads that share this instance
+ internal bool doFlushAfter; // true if we should flush after processing current doc
+ internal DocConsumerPerThread consumer;
+ internal DocumentsWriter.DocState docState;
+
+ internal DocumentsWriter docWriter;
+
+ public DocumentsWriterThreadState(DocumentsWriter docWriter)
+ {
+ this.docWriter = docWriter;
+ docState = new DocumentsWriter.DocState();
+ docState.maxFieldLength = docWriter.maxFieldLength;
+ docState.infoStream = docWriter.infoStream;
+ docState.similarity = docWriter.similarity;
+ docState.docWriter = docWriter;
+ consumer = docWriter.consumer.AddThread(this);
+ }
+
+ internal void DoAfterFlush()
+ {
+ numThreads = 0;
+ doFlushAfter = false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/FieldInfo.cs b/external/Lucene.Net.Light/src/core/Index/FieldInfo.cs
new file mode 100644
index 0000000000..bfca8afb4c
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/FieldInfo.cs
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+
+namespace Lucene.Net.Index
+{
+
+ public sealed class FieldInfo : System.ICloneable
+ {
+ internal System.String name;
+ internal bool isIndexed;
+ internal int number;
+
+ // true if term vector for this field should be stored
+ internal bool storeTermVector;
+ internal bool storeOffsetWithTermVector;
+ internal bool storePositionWithTermVector;
+
+ internal bool omitNorms; // omit norms associated with indexed fields
+ internal bool omitTermFreqAndPositions;
+
+ internal bool storePayloads; // whether this field stores payloads together with term positions
+
+ internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
+ {
+ name = na;
+ isIndexed = tk;
+ number = nu;
+ if (isIndexed)
+ {
+ this.storeTermVector = storeTermVector;
+ this.storeOffsetWithTermVector = storeOffsetWithTermVector;
+ this.storePositionWithTermVector = storePositionWithTermVector;
+ this.storePayloads = storePayloads;
+ this.omitNorms = omitNorms;
+ this.omitTermFreqAndPositions = omitTermFreqAndPositions;
+ }
+ else
+ {
+ // for non-indexed fields, leave defaults
+ this.storeTermVector = false;
+ this.storeOffsetWithTermVector = false;
+ this.storePositionWithTermVector = false;
+ this.storePayloads = false;
+ this.omitNorms = true;
+ this.omitTermFreqAndPositions = false;
+ }
+ }
+
+ public System.Object Clone()
+ {
+ return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+ }
+
+ internal void Update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions)
+ {
+ if (this.isIndexed != isIndexed)
+ {
+ this.isIndexed = true; // once indexed, always index
+ }
+ if (isIndexed)
+ {
+ // if updated field data is not for indexing, leave the updates out
+ if (this.storeTermVector != storeTermVector)
+ {
+ this.storeTermVector = true; // once vector, always vector
+ }
+ if (this.storePositionWithTermVector != storePositionWithTermVector)
+ {
+ this.storePositionWithTermVector = true; // once vector, always vector
+ }
+ if (this.storeOffsetWithTermVector != storeOffsetWithTermVector)
+ {
+ this.storeOffsetWithTermVector = true; // once vector, always vector
+ }
+ if (this.storePayloads != storePayloads)
+ {
+ this.storePayloads = true;
+ }
+ if (this.omitNorms != omitNorms)
+ {
+ this.omitNorms = false; // once norms are stored, always store
+ }
+ if (this.omitTermFreqAndPositions != omitTermFreqAndPositions)
+ {
+ this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
+ }
+ }
+ }
+
+ public bool storePayloads_ForNUnit
+ {
+ get { return storePayloads; }
+ }
+
+ public System.String name_ForNUnit
+ {
+ get { return name; }
+ }
+
+ public bool isIndexed_ForNUnit
+ {
+ get { return isIndexed; }
+ }
+
+ public bool omitNorms_ForNUnit
+ {
+ get { return omitNorms; }
+ }
+
+ public bool omitTermFreqAndPositions_ForNUnit
+ {
+ get { return omitTermFreqAndPositions; }
+ }
+
+ public bool storeTermVector_ForNUnit
+ {
+ get { return storeTermVector; }
+ }
+ }
+}
\ No newline at end of file
diff --git a/external/Lucene.Net.Light/src/core/Index/FieldInfos.cs b/external/Lucene.Net.Light/src/core/Index/FieldInfos.cs
new file mode 100644
index 0000000000..8c9cae6950
--- /dev/null
+++ b/external/Lucene.Net.Light/src/core/Index/FieldInfos.cs
@@ -0,0 +1,491 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Documents;
+using Lucene.Net.Support;
+using Document = Lucene.Net.Documents.Document;
+using Directory = Lucene.Net.Store.Directory;
+using IndexInput = Lucene.Net.Store.IndexInput;
+using IndexOutput = Lucene.Net.Store.IndexOutput;
+using StringHelper = Lucene.Net.Util.StringHelper;
+
+namespace Lucene.Net.Index
+{
+
+ /// Access to the Fieldable Info file that describes document fields and whether or
+ /// not they are indexed. Each segment has a separate Fieldable Info file. Objects
+ /// of this class are thread-safe for multiple readers, but only one thread can
+ /// be adding documents at a time, with no other reader or writer threads
+ /// accessing this object.
+ ///
+ public sealed class FieldInfos : ICloneable
+ {
+
+ // Used internally (ie not written to *.fnm files) for pre-2.9 files
+ public const int FORMAT_PRE = - 1;
+
+ // First used in 2.9; prior to 2.9 there was no format header
+ public const int FORMAT_START = - 2;
+
+ internal static readonly int CURRENT_FORMAT = FORMAT_START;
+
+ internal const byte IS_INDEXED = (0x1);
+ internal const byte STORE_TERMVECTOR = (0x2);
+ internal const byte STORE_POSITIONS_WITH_TERMVECTOR =(0x4);
+ internal const byte STORE_OFFSET_WITH_TERMVECTOR = (0x8);
+ internal const byte OMIT_NORMS = (0x10);
+ internal const byte STORE_PAYLOADS = (0x20);
+ internal const byte OMIT_TERM_FREQ_AND_POSITIONS = (0x40);
+
+ private readonly System.Collections.Generic.List byNumber = new System.Collections.Generic.List();
+ private readonly HashMap