diff --git a/Makefile.in b/Makefile.in index f0873e25ff..a80823be98 100644 --- a/Makefile.in +++ b/Makefile.in @@ -85,7 +85,7 @@ DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(srcdir)/config.h.in mkinstalldirs \ $(srcdir)/mono-core.spec.in $(srcdir)/mono-uninstalled.pc.in \ AUTHORS COPYING.LIB ChangeLog NEWS compile config.guess \ - config.rpath config.sub install-sh missing ltmain.sh + config.rpath config.sub depcomp install-sh missing ltmain.sh ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/iconv.m4 \ $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ diff --git a/configure.REMOVED.git-id b/configure.REMOVED.git-id index 61434bb1ea..53ebe19d89 100644 --- a/configure.REMOVED.git-id +++ b/configure.REMOVED.git-id @@ -1 +1 @@ -0153a4d763e8fdec1bfc6ad8a10db68ae9a8b3ad \ No newline at end of file +94ef65996a186293ae651e688580672f8de4b880 \ No newline at end of file diff --git a/configure.ac.REMOVED.git-id b/configure.ac.REMOVED.git-id index f3f1ee7eb9..8106806460 100644 --- a/configure.ac.REMOVED.git-id +++ b/configure.ac.REMOVED.git-id @@ -1 +1 @@ -4d5ca3baf19b55971eba3a1e4c430304031594ed \ No newline at end of file +6e1f9bd8a55b889798c88191650b5752d2b58e3b \ No newline at end of file diff --git a/external/Lucene.Net.Light/README.md b/external/Lucene.Net.Light/README.md new file mode 100644 index 0000000000..250cada378 --- /dev/null +++ b/external/Lucene.Net.Light/README.md @@ -0,0 +1,6 @@ +This is a subset of Lucene.Net as used by Mono's Monodoc tool. + +This module is a checout of: + + git://github.com/mono/lucene.net.git + 88fb67b07621dfed054d8d75fd50672fb26349df diff --git a/external/Lucene.Net.Light/src/core/Analysis/ASCIIFoldingFilter.cs.REMOVED.git-id b/external/Lucene.Net.Light/src/core/Analysis/ASCIIFoldingFilter.cs.REMOVED.git-id new file mode 100644 index 0000000000..b821485546 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/ASCIIFoldingFilter.cs.REMOVED.git-id @@ -0,0 +1 @@ +61338700516ffa26e2a36fef4a0843a5fbf01c62 \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Analyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/Analyzer.cs new file mode 100644 index 0000000000..cea0ee30cc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Analyzer.cs @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; +using Lucene.Net.Store; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis +{ + /// An Analyzer builds TokenStreams, which analyze text. It thus represents a + /// policy for extracting index terms from text. + ///

+ /// Typical implementations first build a Tokenizer, which breaks the stream of + /// characters from the Reader into raw Tokens. One or more TokenFilters may + /// then be applied to the output of the Tokenizer. + ///

+ public abstract class Analyzer : IDisposable + { + /// Creates a TokenStream which tokenizes all the text in the provided + /// Reader. Must be able to handle null field name for + /// backward compatibility. + /// + public abstract TokenStream TokenStream(String fieldName, System.IO.TextReader reader); + + /// Creates a TokenStream that is allowed to be re-used + /// from the previous time that the same thread called + /// this method. Callers that do not need to use more + /// than one TokenStream at the same time from this + /// analyzer should use this method for better + /// performance. + /// + public virtual TokenStream ReusableTokenStream(String fieldName, System.IO.TextReader reader) + { + return TokenStream(fieldName, reader); + } + + private CloseableThreadLocal tokenStreams = new CloseableThreadLocal(); + private bool isDisposed; + + /// Used by Analyzers that implement reusableTokenStream + /// to retrieve previously saved TokenStreams for re-use + /// by the same thread. + /// + protected internal virtual object PreviousTokenStream + { + get + { + if (tokenStreams == null) + { + throw new AlreadyClosedException("this Analyzer is closed"); + } + return tokenStreams.Get(); + } + set + { + if (tokenStreams == null) + { + throw new AlreadyClosedException("this Analyzer is closed"); + } + tokenStreams.Set(value); + } + } + + [Obsolete()] + protected internal bool overridesTokenStreamMethod = false; + + /// This is only present to preserve + /// back-compat of classes that subclass a core analyzer + /// and override tokenStream but not reusableTokenStream + /// + /// + /// Java uses Class<? extends Analyer> to constrain to + /// only Types that inherit from Analyzer. C# does not have a generic type class, + /// ie Type<t>. The method signature stays the same, and an exception may + /// still be thrown, if the method doesn't exist. + /// + [Obsolete("This is only present to preserve back-compat of classes that subclass a core analyzer and override tokenStream but not reusableTokenStream ")] + protected internal virtual void SetOverridesTokenStreamMethod() + where TClass : Analyzer + { + try + { + System.Reflection.MethodInfo m = this.GetType().GetMethod("TokenStream", new[] { typeof(string), typeof(System.IO.TextReader) }); + overridesTokenStreamMethod = m.DeclaringType != typeof(TClass); + } + catch (MethodAccessException) + { + // can't happen, as baseClass is subclass of Analyzer + overridesTokenStreamMethod = false; + } + } + + + /// Invoked before indexing a Fieldable instance if + /// terms have already been added to that field. This allows custom + /// analyzers to place an automatic position increment gap between + /// Fieldable instances using the same field name. The default value + /// position increment gap is 0. With a 0 position increment gap and + /// the typical default token position increment of 1, all terms in a field, + /// including across Fieldable instances, are in successive positions, allowing + /// exact PhraseQuery matches, for instance, across Fieldable instance boundaries. + /// + /// + /// Fieldable name being indexed. + /// + /// position increment gap, added to the next token emitted from + /// + public virtual int GetPositionIncrementGap(String fieldName) + { + return 0; + } + + /// Just like , except for + /// Token offsets instead. By default this returns 1 for + /// tokenized fields and, as if the fields were joined + /// with an extra space character, and 0 for un-tokenized + /// fields. This method is only called if the field + /// produced at least one token for indexing. + /// + /// + /// the field just indexed + /// + /// offset gap, added to the next token emitted from + /// + public virtual int GetOffsetGap(IFieldable field) + { + return field.IsTokenized ? 1 : 0; + } + + /// Frees persistent resources used by this Analyzer + public void Close() + { + Dispose(); + } + + public virtual void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (tokenStreams != null) + { + tokenStreams.Close(); + tokenStreams = null; + } + } + isDisposed = true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/BaseCharFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/BaseCharFilter.cs new file mode 100644 index 0000000000..b84fce08c8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/BaseCharFilter.cs @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis +{ + + /// + /// * Base utility class for implementing a . + /// * You subclass this, and then record mappings by calling + /// * , and then invoke the correct + /// * method to correct an offset. + /// + public abstract class BaseCharFilter : CharFilter + { + + private int[] offsets; + private int[] diffs; + private int size = 0; + + protected BaseCharFilter(CharStream @in) : base(@in) + { + } + + /* Retrieve the corrected offset. */ + //@Override + protected internal override int Correct(int currentOff) + { + if (offsets == null || currentOff < offsets[0]) + { + return currentOff; + } + + int hi = size - 1; + if (currentOff >= offsets[hi]) + return currentOff + diffs[hi]; + + int lo = 0; + int mid = -1; + + while (hi >= lo) + { + mid = Number.URShift(lo + hi, 1); + if (currentOff < offsets[mid]) + hi = mid - 1; + else if (currentOff > offsets[mid]) + lo = mid + 1; + else + return currentOff + diffs[mid]; + } + + if (currentOff < offsets[mid]) + return mid == 0 ? currentOff : currentOff + diffs[mid - 1]; + return currentOff + diffs[mid]; + } + + protected int LastCumulativeDiff + { + get + { + return offsets == null ? 0 : diffs[size - 1]; + } + } + + [Obsolete("Use LastCumulativeDiff property instead")] + protected int GetLastCumulativeDiff() + { + return LastCumulativeDiff; + } + + protected void AddOffCorrectMap(int off, int cumulativeDiff) + { + if (offsets == null) + { + offsets = new int[64]; + diffs = new int[64]; + } + else if (size == offsets.Length) + { + offsets = ArrayUtil.Grow(offsets); + diffs = ArrayUtil.Grow(diffs); + } + + offsets[size] = off; + diffs[size++] = cumulativeDiff; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/CachingTokenFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/CachingTokenFilter.cs new file mode 100644 index 0000000000..c5f7694d39 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/CachingTokenFilter.cs @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Analysis +{ + + /// This class can be used if the token attributes of a TokenStream + /// are intended to be consumed more than once. It caches + /// all token attribute states locally in a List. + /// + ///

CachingTokenFilter implements the optional method + /// , which repositions the + /// stream to the first Token. + ///

+ public sealed class CachingTokenFilter : TokenFilter + { + private System.Collections.Generic.LinkedList cache = null; + private System.Collections.Generic.IEnumerator iterator = null; + private State finalState; + + public CachingTokenFilter(TokenStream input):base(input) + { + } + + public override bool IncrementToken() + { + if (cache == null) + { + // fill cache lazily + cache = new System.Collections.Generic.LinkedList(); + FillCache(); + iterator = cache.GetEnumerator(); + } + + if (!iterator.MoveNext()) + { + // the cache is exhausted, return false + return false; + } + // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. + RestoreState(iterator.Current); + return true; + } + + public override void End() + { + if (finalState != null) + { + RestoreState(finalState); + } + } + + public override void Reset() + { + if (cache != null) + { + iterator = cache.GetEnumerator(); + } + } + + private void FillCache() + { + while (input.IncrementToken()) + { + cache.AddLast(CaptureState()); + } + // capture final state + input.End(); + finalState = CaptureState(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharArraySet.cs b/external/Lucene.Net.Light/src/core/Analysis/CharArraySet.cs new file mode 100644 index 0000000000..e7df0ba284 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/CharArraySet.cs @@ -0,0 +1,517 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Linq; +using System.Collections.Generic; + +namespace Lucene.Net.Analysis +{ + /// A simple class that stores Strings as char[]'s in a + /// hash table. Note that this is not a general purpose + /// class. For example, it cannot remove items from the + /// set, nor does it resize its hash table to be smaller, + /// etc. It is designed to be quick to test if a char[] + /// is in the set without the necessity of converting it + /// to a String first. + ///

+ /// Please note: This class implements but + /// does not behave like it should in all cases. The generic type is + /// , because you can add any object to it, + /// that has a string representation. The add methods will use + /// and store the result using a + /// buffer. The same behaviour have the methods. + /// The method returns an IEnumerable. + /// For type safety also {@link #stringIterator()} is provided. + ///

+ // TODO: java uses wildcards, .net doesn't have this, easiest way is to + // make the entire class generic. Ultimately, though, since this + // works with strings, I can't think of a reason not to just declare + // this as an ISet. + public class CharArraySet : ISet + { + bool _ReadOnly = false; + const int INIT_SIZE = 8; + char[][] _Entries; + int _Count; + bool _IgnoreCase; + public static CharArraySet EMPTY_SET = UnmodifiableSet(new CharArraySet(0, false)); + + private void Init(int startSize, bool ignoreCase) + { + this._IgnoreCase = ignoreCase; + int size = INIT_SIZE; + while (startSize + (startSize >> 2) > size) + size <<= 1; + _Entries = new char[size][]; + } + + /// Create set with enough capacity to hold startSize + /// terms + /// + public CharArraySet(int startSize, bool ignoreCase) + { + Init(startSize, ignoreCase); + } + + public CharArraySet(IEnumerable c, bool ignoreCase) + { + Init(c.Count(), ignoreCase); + AddItems(c); + } + + /// Create set from a Collection of char[] or String + public CharArraySet(IEnumerable c, bool ignoreCase) + { + Init(c.Count(), ignoreCase); + AddItems(c); + } + + private void AddItems(IEnumerable items) + { + foreach(var item in items) + { + Add(item.ToString()); + } + } + + /// Create set from entries + private CharArraySet(char[][] entries, bool ignoreCase, int count) + { + this._Entries = entries; + this._IgnoreCase = ignoreCase; + this._Count = count; + } + + /// true if the len chars of text starting at off + /// are in the set + /// + public virtual bool Contains(char[] text, int off, int len) + { + return _Entries[GetSlot(text, off, len)] != null; + } + + public virtual bool Contains(string text) + { + return _Entries[GetSlot(text)] != null; + } + + + private int GetSlot(char[] text, int off, int len) + { + int code = GetHashCode(text, off, len); + int pos = code & (_Entries.Length - 1); + char[] text2 = _Entries[pos]; + if (text2 != null && !Equals(text, off, len, text2)) + { + int inc = ((code >> 8) + code) | 1; + do + { + code += inc; + pos = code & (_Entries.Length - 1); + text2 = _Entries[pos]; + } + while (text2 != null && !Equals(text, off, len, text2)); + } + return pos; + } + + /// Returns true if the String is in the set + private int GetSlot(string text) + { + int code = GetHashCode(text); + int pos = code & (_Entries.Length - 1); + char[] text2 = _Entries[pos]; + if (text2 != null && !Equals(text, text2)) + { + int inc = ((code >> 8) + code) | 1; + do + { + code += inc; + pos = code & (_Entries.Length - 1); + text2 = _Entries[pos]; + } + while (text2 != null && !Equals(text, text2)); + } + return pos; + } + + public bool Add(string text) + { + if (_ReadOnly) throw new NotSupportedException(); + return Add(text.ToCharArray()); + } + + /// Add this char[] directly to the set. + /// If ignoreCase is true for this Set, the text array will be directly modified. + /// The user should never modify this text array after calling this method. + /// + public bool Add(char[] text) + { + if (_ReadOnly) throw new NotSupportedException(); + + if (_IgnoreCase) + for (int i = 0; i < text.Length; i++) + text[i] = Char.ToLower(text[i]); + int slot = GetSlot(text, 0, text.Length); + if (_Entries[slot] != null) + return false; + _Entries[slot] = text; + _Count++; + + if (_Count + (_Count >> 2) > _Entries.Length) + { + Rehash(); + } + + return true; + } + + private bool Equals(char[] text1, int off, int len, char[] text2) + { + if (len != text2.Length) + return false; + if (_IgnoreCase) + { + for (int i = 0; i < len; i++) + { + if (char.ToLower(text1[off + i]) != text2[i]) + return false; + } + } + else + { + for (int i = 0; i < len; i++) + { + if (text1[off + i] != text2[i]) + return false; + } + } + return true; + } + + private bool Equals(string text1, char[] text2) + { + int len = text1.Length; + if (len != text2.Length) + return false; + if (_IgnoreCase) + { + for (int i = 0; i < len; i++) + { + if (char.ToLower(text1[i]) != text2[i]) + return false; + } + } + else + { + for (int i = 0; i < len; i++) + { + if (text1[i] != text2[i]) + return false; + } + } + return true; + } + + private void Rehash() + { + int newSize = 2 * _Entries.Length; + char[][] oldEntries = _Entries; + _Entries = new char[newSize][]; + + for (int i = 0; i < oldEntries.Length; i++) + { + char[] text = oldEntries[i]; + if (text != null) + { + // todo: could be faster... no need to compare strings on collision + _Entries[GetSlot(text, 0, text.Length)] = text; + } + } + } + + private int GetHashCode(char[] text, int offset, int len) + { + int code = 0; + int stop = offset + len; + if (_IgnoreCase) + { + for (int i = offset; i < stop; i++) + { + code = code * 31 + char.ToLower(text[i]); + } + } + else + { + for (int i = offset; i < stop; i++) + { + code = code * 31 + text[i]; + } + } + return code; + } + + private int GetHashCode(string text) + { + int code = 0; + int len = text.Length; + if (_IgnoreCase) + { + for (int i = 0; i < len; i++) + { + code = code * 31 + char.ToLower(text[i]); + } + } + else + { + for (int i = 0; i < len; i++) + { + code = code * 31 + text[i]; + } + } + return code; + } + + public int Count + { + get { return _Count; } + } + + public bool IsEmpty + { + get { return _Count == 0; } + } + + public bool Contains(object item) + { + var text = item as char[]; + return text != null ? Contains(text, 0, text.Length) : Contains(item.ToString()); + } + + public bool Add(object item) + { + return Add(item.ToString()); + } + + void ICollection.Add(string item) + { + this.Add(item); + } + + /// + /// Returns an unmodifiable . This allows to provide + /// unmodifiable views of internal sets for "read-only" use + /// + /// A Set for which the unmodifiable set it returns. + /// A new unmodifiable + /// ArgumentNullException of the given set is null + public static CharArraySet UnmodifiableSet(CharArraySet set) + { + if(set == null) + throw new ArgumentNullException("Given set is null"); + if (set == EMPTY_SET) + return EMPTY_SET; + if (set._ReadOnly) + return set; + + var newSet = new CharArraySet(set._Entries, set._IgnoreCase, set.Count) {IsReadOnly = true}; + return newSet; + } + + /// + /// returns a copy of the given set as a . If the given set + /// is a the ignoreCase property will be preserved. + /// + /// A set to copy + /// a copy of the given set as a . If the given set + /// is a the ignoreCase property will be preserved. + public static CharArraySet Copy(ISet set) + { + if (set == null) + throw new ArgumentNullException("set", "Given set is null!"); + if (set == EMPTY_SET) + return EMPTY_SET; + bool ignoreCase = set is CharArraySet && ((CharArraySet)set)._IgnoreCase; + var arrSet = new CharArraySet(set.Count, ignoreCase); + arrSet.AddItems(set); + return arrSet; + } + + public void Clear() + { + throw new NotSupportedException("Remove not supported!"); + } + + public bool IsReadOnly + { + get { return _ReadOnly; } + private set { _ReadOnly = value; } + } + + /// Adds all of the elements in the specified collection to this collection + public void UnionWith(IEnumerable other) + { + if (_ReadOnly) throw new NotSupportedException(); + + foreach (string s in other) + { + Add(s.ToCharArray()); + } + } + + /// Wrapper that calls UnionWith + public void AddAll(IEnumerable coll) + { + UnionWith(coll); + } + + #region Unneeded methods + public void RemoveAll(ICollection c) + { + throw new NotSupportedException(); + } + + public void RetainAll(ICollection c) + { + throw new NotSupportedException(); + } + + void ICollection.CopyTo(string[] array, int arrayIndex) + { + throw new NotSupportedException(); + } + + void ISet.IntersectWith(IEnumerable other) + { + throw new NotSupportedException(); + } + + void ISet.ExceptWith(IEnumerable other) + { + throw new NotSupportedException(); + } + + void ISet.SymmetricExceptWith(IEnumerable other) + { + throw new NotSupportedException(); + } + + bool ISet.IsSubsetOf(IEnumerable other) + { + throw new NotSupportedException(); + } + + bool ISet.IsSupersetOf(IEnumerable other) + { + throw new NotSupportedException(); + } + + bool ISet.IsProperSupersetOf(IEnumerable other) + { + throw new NotSupportedException(); + } + + bool ISet.IsProperSubsetOf(IEnumerable other) + { + throw new NotSupportedException(); + } + + bool ISet.Overlaps(IEnumerable other) + { + throw new NotSupportedException(); + } + + bool ISet.SetEquals(IEnumerable other) + { + throw new NotSupportedException(); + } + + bool ICollection.Remove(string item) + { + throw new NotSupportedException(); + } + #endregion + + /// + /// The IEnumerator<String> for this set. Strings are constructed on the fly, + /// so use nextCharArray for more efficient access + /// + public class CharArraySetEnumerator : IEnumerator + { + readonly CharArraySet _Creator; + int pos = -1; + char[] cur; + + protected internal CharArraySetEnumerator(CharArraySet creator) + { + _Creator = creator; + } + + public bool MoveNext() + { + cur = null; + pos++; + while (pos < _Creator._Entries.Length && (cur = _Creator._Entries[pos]) == null) + pos++; + return cur != null; + } + + /// do not modify the returned char[] + public char[] NextCharArray() + { + return cur; + } + + public string Current + { + get { return new string(NextCharArray()); } + } + + public void Dispose() + { + } + + object IEnumerator.Current + { + get { return new string(NextCharArray()); } + } + + public void Reset() + { + throw new NotImplementedException(); + } + } + + public IEnumerator StringEnumerator() + { + return new CharArraySetEnumerator(this); + } + + public IEnumerator GetEnumerator() + { + return new CharArraySetEnumerator(this); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } + +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/CharFilter.cs new file mode 100644 index 0000000000..039f841c3c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/CharFilter.cs @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Analysis +{ + + /// Subclasses of CharFilter can be chained to filter CharStream. + /// They can be used as with additional offset + /// correction. s will automatically use + /// if a CharFilter/CharStream subclass is used. + /// + /// + /// $Id$ + /// + /// + public abstract class CharFilter : CharStream + { + private long currentPosition = -1; + private bool isDisposed; + protected internal CharStream input; + + protected internal CharFilter(CharStream in_Renamed) : base(in_Renamed) + { + input = in_Renamed; + } + + /// Subclass may want to override to correct the current offset. + /// current offset + /// corrected offset + protected internal virtual int Correct(int currentOff) + { + return currentOff; + } + + /// Chains the corrected offset through the input + /// CharFilter. + /// + public override int CorrectOffset(int currentOff) + { + return input.CorrectOffset(Correct(currentOff)); + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (input != null) + { + input.Close(); + } + } + + input = null; + isDisposed = true; + base.Dispose(disposing); + } + + public override int Read(System.Char[] cbuf, int off, int len) + { + return input.Read(cbuf, off, len); + } + + public bool MarkSupported() + { + return input.BaseStream.CanSeek; + } + + public void Mark(int readAheadLimit) + { + currentPosition = input.BaseStream.Position; + input.BaseStream.Position = readAheadLimit; + } + + public void Reset() + { + input.BaseStream.Position = currentPosition; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharReader.cs b/external/Lucene.Net.Light/src/core/Analysis/CharReader.cs new file mode 100644 index 0000000000..2120bd4d8d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/CharReader.cs @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Analysis +{ + + /// CharReader is a Reader wrapper. It reads chars from + /// Reader and outputs , defining an + /// identify function method that + /// simply returns the provided offset. + /// + public sealed class CharReader:CharStream + { + private long currentPosition = -1; + + private bool isDisposed; + + internal System.IO.StreamReader input; + + public static CharStream Get(System.IO.TextReader input) + { + var charStream = input as CharStream; + if (charStream != null) + return charStream; + + // {{Aroush-2.9}} isn't there a better (faster) way to do this? + var theString = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(input.ReadToEnd())); + return new CharReader(new System.IO.StreamReader(theString)); + //return input is CharStream?(CharStream) input:new CharReader(input); + } + + private CharReader(System.IO.StreamReader in_Renamed) : base(in_Renamed) + { + input = in_Renamed; + } + + public override int CorrectOffset(int currentOff) + { + return currentOff; + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (input != null) + { + input.Close(); + } + } + + input = null; + isDisposed = true; + base.Dispose(disposing); + } + + public override int Read(System.Char[] cbuf, int off, int len) + { + return input.Read(cbuf, off, len); + } + + public bool MarkSupported() + { + return input.BaseStream.CanSeek; + } + + public void Mark(int readAheadLimit) + { + currentPosition = input.BaseStream.Position; + input.BaseStream.Position = readAheadLimit; + } + + public void Reset() + { + input.BaseStream.Position = currentPosition; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharStream.cs b/external/Lucene.Net.Light/src/core/Analysis/CharStream.cs new file mode 100644 index 0000000000..0b36fe2d8e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/CharStream.cs @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Analysis +{ + + /// CharStream adds + /// functionality over . All Tokenizers accept a + /// CharStream instead of as input, which enables + /// arbitrary character based filtering before tokenization. + /// The method fixed offsets to account for + /// removal or insertion of characters, so that the offsets + /// reported in the tokens match the character offsets of the + /// original Reader. + /// + public abstract class CharStream : System.IO.StreamReader + { + protected CharStream(System.IO.StreamReader reader) : base(reader.BaseStream) + { + } + + /// Called by CharFilter(s) and Tokenizer to correct token offset. + /// + /// + /// offset as seen in the output + /// + /// corrected offset based on the input + /// + public abstract int CorrectOffset(int currentOff); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/CharTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/CharTokenizer.cs new file mode 100644 index 0000000000..22423ec345 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/CharTokenizer.cs @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Analysis.Tokenattributes; +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// An abstract base class for simple, character-oriented tokenizers. + public abstract class CharTokenizer:Tokenizer + { + protected CharTokenizer(System.IO.TextReader input):base(input) + { + offsetAtt = AddAttribute(); + termAtt = AddAttribute(); + } + + protected CharTokenizer(AttributeSource source, System.IO.TextReader input):base(source, input) + { + offsetAtt = AddAttribute(); + termAtt = AddAttribute(); + } + + protected CharTokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory, input) + { + offsetAtt = AddAttribute(); + termAtt = AddAttribute(); + } + + private int offset = 0, bufferIndex = 0, dataLen = 0; + private const int MAX_WORD_LEN = 255; + private const int IO_BUFFER_SIZE = 4096; + private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE]; + + private readonly ITermAttribute termAtt; + private readonly IOffsetAttribute offsetAtt; + + /// Returns true iff a character should be included in a token. This + /// tokenizer generates as tokens adjacent sequences of characters which + /// satisfy this predicate. Characters for which this is false are used to + /// define token boundaries and are not included in tokens. + /// + protected internal abstract bool IsTokenChar(char c); + + /// Called on each token character to normalize it before it is added to the + /// token. The default implementation does nothing. Subclasses may use this + /// to, e.g., lowercase tokens. + /// + protected internal virtual char Normalize(char c) + { + return c; + } + + public override bool IncrementToken() + { + ClearAttributes(); + int length = 0; + int start = bufferIndex; + char[] buffer = termAtt.TermBuffer(); + while (true) + { + + if (bufferIndex >= dataLen) + { + offset += dataLen; + dataLen = input.Read(ioBuffer, 0, ioBuffer.Length); + if (dataLen <= 0) + { + dataLen = 0; // so next offset += dataLen won't decrement offset + if (length > 0) + break; + return false; + } + bufferIndex = 0; + } + + char c = ioBuffer[bufferIndex++]; + + if (IsTokenChar(c)) + { + // if it's a token char + + if (length == 0) + // start of token + start = offset + bufferIndex - 1; + else if (length == buffer.Length) + buffer = termAtt.ResizeTermBuffer(1 + length); + + buffer[length++] = Normalize(c); // buffer it, normalized + + if (length == MAX_WORD_LEN) + // buffer overflow! + break; + } + else if (length > 0) + // at non-Letter w/ chars + break; // return 'em + } + + termAtt.SetTermLength(length); + offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length)); + return true; + } + + public override void End() + { + // set final offset + int finalOffset = CorrectOffset(offset); + offsetAtt.SetOffset(finalOffset, finalOffset); + } + + public override void Reset(System.IO.TextReader input) + { + base.Reset(input); + bufferIndex = 0; + offset = 0; + dataLen = 0; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/ISOLatin1AccentFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/ISOLatin1AccentFilter.cs new file mode 100644 index 0000000000..5fd839e6e4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/ISOLatin1AccentFilter.cs @@ -0,0 +1,344 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; + +namespace Lucene.Net.Analysis +{ + + /// A filter that replaces accented characters in the ISO Latin 1 character set + /// (ISO-8859-1) by their unaccented equivalent. The case will not be altered. + ///

+ /// For instance, 'À' will be replaced by 'a'. + ///

+ /// + ///

+ /// If you build a new index, use + /// which covers a superset of Latin 1. + /// This class is included for use with existing indexes and will be removed + /// in a future release (possible Lucene 4.0) + /// + [Obsolete("If you build a new index, use ASCIIFoldingFilter which covers a superset of Latin 1. This class is included for use with existing indexes and will be removed in a future release (possible Lucene 4.0).")] + public class ISOLatin1AccentFilter : TokenFilter + { + public ISOLatin1AccentFilter(TokenStream input):base(input) + { + termAtt = AddAttribute(); + } + + private char[] output = new char[256]; + private int outputPos; + private readonly ITermAttribute termAtt; + + public override bool IncrementToken() + { + if (input.IncrementToken()) + { + char[] buffer = termAtt.TermBuffer(); + int length = termAtt.TermLength(); + // If no characters actually require rewriting then we + // just return token as-is: + for (int i = 0; i < length; i++) + { + char c = buffer[i]; + if (c >= '\u00c0' && c <= '\uFB06') + { + RemoveAccents(buffer, length); + termAtt.SetTermBuffer(output, 0, outputPos); + break; + } + } + return true; + } + return false; + } + + /// To replace accented characters in a String by unaccented equivalents. + public void RemoveAccents(char[] input, int length) + { + + // Worst-case length required: + int maxSizeNeeded = 2 * length; + + int size = output.Length; + while (size < maxSizeNeeded) + size *= 2; + + if (size != output.Length) + output = new char[size]; + + outputPos = 0; + + int pos = 0; + + for (int i = 0; i < length; i++, pos++) + { + char c = input[pos]; + + // Quick test: if it's not in range then just keep + // current character + if (c < '\u00c0' || c > '\uFB06') + output[outputPos++] = c; + else + { + switch (c) + { + + case '\u00C0': + // À + case '\u00C1': + // �? + case '\u00C2': + //  + case '\u00C3': + // à + case '\u00C4': + // Ä + case '\u00C5': // Å + output[outputPos++] = 'A'; + break; + + case '\u00C6': // Æ + output[outputPos++] = 'A'; + output[outputPos++] = 'E'; + break; + + case '\u00C7': // Ç + output[outputPos++] = 'C'; + break; + + case '\u00C8': + // È + case '\u00C9': + // É + case '\u00CA': + // Ê + case '\u00CB': // Ë + output[outputPos++] = 'E'; + break; + + case '\u00CC': + // Ì + case '\u00CD': + // �? + case '\u00CE': + // Î + case '\u00CF': // �? + output[outputPos++] = 'I'; + break; + + case '\u0132': // IJ + output[outputPos++] = 'I'; + output[outputPos++] = 'J'; + break; + + case '\u00D0': // �? + output[outputPos++] = 'D'; + break; + + case '\u00D1': // Ñ + output[outputPos++] = 'N'; + break; + + case '\u00D2': + // Ò + case '\u00D3': + // Ó + case '\u00D4': + // Ô + case '\u00D5': + // Õ + case '\u00D6': + // Ö + case '\u00D8': // Ø + output[outputPos++] = 'O'; + break; + + case '\u0152': // Œ + output[outputPos++] = 'O'; + output[outputPos++] = 'E'; + break; + + case '\u00DE': // Þ + output[outputPos++] = 'T'; + output[outputPos++] = 'H'; + break; + + case '\u00D9': + // Ù + case '\u00DA': + // Ú + case '\u00DB': + // Û + case '\u00DC': // Ü + output[outputPos++] = 'U'; + break; + + case '\u00DD': + // �? + case '\u0178': // Ÿ + output[outputPos++] = 'Y'; + break; + + case '\u00E0': + // à + case '\u00E1': + // á + case '\u00E2': + // â + case '\u00E3': + // ã + case '\u00E4': + // ä + case '\u00E5': // å + output[outputPos++] = 'a'; + break; + + case '\u00E6': // æ + output[outputPos++] = 'a'; + output[outputPos++] = 'e'; + break; + + case '\u00E7': // ç + output[outputPos++] = 'c'; + break; + + case '\u00E8': + // è + case '\u00E9': + // é + case '\u00EA': + // ê + case '\u00EB': // ë + output[outputPos++] = 'e'; + break; + + case '\u00EC': + // ì + case '\u00ED': + // í + case '\u00EE': + // î + case '\u00EF': // ï + output[outputPos++] = 'i'; + break; + + case '\u0133': // ij + output[outputPos++] = 'i'; + output[outputPos++] = 'j'; + break; + + case '\u00F0': // ð + output[outputPos++] = 'd'; + break; + + case '\u00F1': // ñ + output[outputPos++] = 'n'; + break; + + case '\u00F2': + // ò + case '\u00F3': + // ó + case '\u00F4': + // ô + case '\u00F5': + // õ + case '\u00F6': + // ö + case '\u00F8': // ø + output[outputPos++] = 'o'; + break; + + case '\u0153': // œ + output[outputPos++] = 'o'; + output[outputPos++] = 'e'; + break; + + case '\u00DF': // ß + output[outputPos++] = 's'; + output[outputPos++] = 's'; + break; + + case '\u00FE': // þ + output[outputPos++] = 't'; + output[outputPos++] = 'h'; + break; + + case '\u00F9': + // ù + case '\u00FA': + // ú + case '\u00FB': + // û + case '\u00FC': // ü + output[outputPos++] = 'u'; + break; + + case '\u00FD': + // ý + case '\u00FF': // ÿ + output[outputPos++] = 'y'; + break; + + case '\uFB00': // ff + output[outputPos++] = 'f'; + output[outputPos++] = 'f'; + break; + + case '\uFB01': // �? + output[outputPos++] = 'f'; + output[outputPos++] = 'i'; + break; + + case '\uFB02': // fl + output[outputPos++] = 'f'; + output[outputPos++] = 'l'; + break; + // following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive) + // case '\uFB03': // ffi + // output[outputPos++] = 'f'; + // output[outputPos++] = 'f'; + // output[outputPos++] = 'i'; + // break; + // case '\uFB04': // ffl + // output[outputPos++] = 'f'; + // output[outputPos++] = 'f'; + // output[outputPos++] = 'l'; + // break; + + case '\uFB05': // ſt + output[outputPos++] = 'f'; + output[outputPos++] = 't'; + break; + + case '\uFB06': // st + output[outputPos++] = 's'; + output[outputPos++] = 't'; + break; + + default: + output[outputPos++] = c; + break; + + } + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/KeywordAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/KeywordAnalyzer.cs new file mode 100644 index 0000000000..116babb97e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/KeywordAnalyzer.cs @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Analysis +{ + + /// "Tokenizes" the entire stream as a single token. This is useful + /// for data like zip codes, ids, and some product names. + /// + public class KeywordAnalyzer:Analyzer + { + public KeywordAnalyzer() + { + SetOverridesTokenStreamMethod(); + } + public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) + { + return new KeywordTokenizer(reader); + } + public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) + { + if (overridesTokenStreamMethod) + { + // LUCENE-1678: force fallback to tokenStream() if we + // have been subclassed and that subclass overrides + // tokenStream but not reusableTokenStream + return TokenStream(fieldName, reader); + } + var tokenizer = (Tokenizer) PreviousTokenStream; + if (tokenizer == null) + { + tokenizer = new KeywordTokenizer(reader); + PreviousTokenStream = tokenizer; + } + else + tokenizer.Reset(reader); + return tokenizer; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/KeywordTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/KeywordTokenizer.cs new file mode 100644 index 0000000000..f97ff9538c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/KeywordTokenizer.cs @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Analysis.Tokenattributes; +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// Emits the entire input as a single token. + public sealed class KeywordTokenizer:Tokenizer + { + + private const int DEFAULT_BUFFER_SIZE = 256; + + private bool done; + private int finalOffset; + private ITermAttribute termAtt; + private IOffsetAttribute offsetAtt; + + public KeywordTokenizer(System.IO.TextReader input):this(input, DEFAULT_BUFFER_SIZE) + { + } + + public KeywordTokenizer(System.IO.TextReader input, int bufferSize):base(input) + { + Init(bufferSize); + } + + public KeywordTokenizer(AttributeSource source, System.IO.TextReader input, int bufferSize):base(source, input) + { + Init(bufferSize); + } + + public KeywordTokenizer(AttributeFactory factory, System.IO.TextReader input, int bufferSize):base(factory, input) + { + Init(bufferSize); + } + + private void Init(int bufferSize) + { + this.done = false; + termAtt = AddAttribute(); + offsetAtt = AddAttribute(); + termAtt.ResizeTermBuffer(bufferSize); + } + + public override bool IncrementToken() + { + if (!done) + { + ClearAttributes(); + done = true; + int upto = 0; + char[] buffer = termAtt.TermBuffer(); + while (true) + { + int length = input.Read(buffer, upto, buffer.Length - upto); + if (length == 0) + break; + upto += length; + if (upto == buffer.Length) + buffer = termAtt.ResizeTermBuffer(1 + buffer.Length); + } + termAtt.SetTermLength(upto); + finalOffset = CorrectOffset(upto); + offsetAtt.SetOffset(CorrectOffset(0), finalOffset); + return true; + } + return false; + } + + public override void End() + { + // set final offset + offsetAtt.SetOffset(finalOffset, finalOffset); + } + + public override void Reset(System.IO.TextReader input) + { + base.Reset(input); + this.done = false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/LengthFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/LengthFilter.cs new file mode 100644 index 0000000000..c4f60ad40e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/LengthFilter.cs @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Analysis.Tokenattributes; + +namespace Lucene.Net.Analysis +{ + + /// Removes words that are too long or too short from the stream. + public sealed class LengthFilter:TokenFilter + { + + internal int min; + internal int max; + + private readonly ITermAttribute termAtt; + + /// Build a filter that removes words that are too long or too + /// short from the text. + /// + public LengthFilter(TokenStream in_Renamed, int min, int max) + : base(in_Renamed) + { + this.min = min; + this.max = max; + termAtt = AddAttribute(); + } + + /// Returns the next input Token whose term() is the right len + public override bool IncrementToken() + { + // return the first non-stop word found + while (input.IncrementToken()) + { + var len = termAtt.TermLength(); + if (len >= min && len <= max) + { + return true; + } + // note: else we ignore it but should we index each part of it? + } + // reached EOS -- return false + return false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/LetterTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/LetterTokenizer.cs new file mode 100644 index 0000000000..77629a85c4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/LetterTokenizer.cs @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// A LetterTokenizer is a tokenizer that divides text at non-letters. That's + /// to say, it defines tokens as maximal strings of adjacent letters, as defined + /// by java.lang.Character.isLetter() predicate. + /// Note: this does a decent job for most European languages, but does a terrible + /// job for some Asian languages, where words are not separated by spaces. + /// + + public class LetterTokenizer:CharTokenizer + { + /// Construct a new LetterTokenizer. + public LetterTokenizer(System.IO.TextReader @in):base(@in) + { + } + + /// Construct a new LetterTokenizer using a given . + public LetterTokenizer(AttributeSource source, System.IO.TextReader @in) + : base(source, @in) + { + } + + /// Construct a new LetterTokenizer using a given . + public LetterTokenizer(AttributeFactory factory, System.IO.TextReader @in) + : base(factory, @in) + { + } + + /// Collects only characters which satisfy + /// . + /// + protected internal override bool IsTokenChar(char c) + { + return System.Char.IsLetter(c); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/LowerCaseFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/LowerCaseFilter.cs new file mode 100644 index 0000000000..cad01976f4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/LowerCaseFilter.cs @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Analysis.Tokenattributes; + +namespace Lucene.Net.Analysis +{ + + /// Normalizes token text to lower case. + public sealed class LowerCaseFilter:TokenFilter + { + public LowerCaseFilter(TokenStream @in) + : base(@in) + { + termAtt = AddAttribute(); + } + + private readonly ITermAttribute termAtt; + + public override bool IncrementToken() + { + if (input.IncrementToken()) + { + + char[] buffer = termAtt.TermBuffer(); + int length = termAtt.TermLength(); + for (int i = 0; i < length; i++) + buffer[i] = System.Char.ToLower(buffer[i]); + + return true; + } + return false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/LowerCaseTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/LowerCaseTokenizer.cs new file mode 100644 index 0000000000..4cea2179dc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/LowerCaseTokenizer.cs @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// LowerCaseTokenizer performs the function of LetterTokenizer + /// and LowerCaseFilter together. It divides text at non-letters and converts + /// them to lower case. While it is functionally equivalent to the combination + /// of LetterTokenizer and LowerCaseFilter, there is a performance advantage + /// to doing the two tasks at once, hence this (redundant) implementation. + ///

+ /// Note: this does a decent job for most European languages, but does a terrible + /// job for some Asian languages, where words are not separated by spaces. + ///

+ public sealed class LowerCaseTokenizer:LetterTokenizer + { + /// Construct a new LowerCaseTokenizer. + public LowerCaseTokenizer(System.IO.TextReader @in) + : base(@in) + { + } + + /// Construct a new LowerCaseTokenizer using a given . + public LowerCaseTokenizer(AttributeSource source, System.IO.TextReader @in) + : base(source, @in) + { + } + + /// Construct a new LowerCaseTokenizer using a given . + public LowerCaseTokenizer(AttributeFactory factory, System.IO.TextReader @in) + : base(factory, @in) + { + } + + /// Converts char to lower case + /// . + /// + protected internal override char Normalize(char c) + { + return System.Char.ToLower(c); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/MappingCharFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/MappingCharFilter.cs new file mode 100644 index 0000000000..9705719f8a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/MappingCharFilter.cs @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; + +namespace Lucene.Net.Analysis +{ + + /// Simplistic that applies the mappings + /// contained in a to the character + /// stream, and correcting the resulting changes to the + /// offsets. + /// + public class MappingCharFilter : BaseCharFilter + { + private readonly NormalizeCharMap normMap; + private LinkedList buffer; + private System.String replacement; + private int charPointer; + private int nextCharCounter; + + /// Default constructor that takes a . + public MappingCharFilter(NormalizeCharMap normMap, CharStream @in) + : base(@in) + { + this.normMap = normMap; + } + + /// Easy-use constructor that takes a . + public MappingCharFilter(NormalizeCharMap normMap, System.IO.TextReader @in) + : base(CharReader.Get(@in)) + { + this.normMap = normMap; + } + + public override int Read() + { + while (true) + { + if (replacement != null && charPointer < replacement.Length) + { + return replacement[charPointer++]; + } + + int firstChar = NextChar(); + if (firstChar == - 1) + return - 1; + NormalizeCharMap nm = normMap.submap != null + ? normMap.submap[(char) firstChar] + : null; + if (nm == null) + return firstChar; + NormalizeCharMap result = Match(nm); + if (result == null) + return firstChar; + replacement = result.normStr; + charPointer = 0; + if (result.diff != 0) + { + int prevCumulativeDiff = LastCumulativeDiff; + if (result.diff < 0) + { + for (int i = 0; i < - result.diff; i++) + AddOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i); + } + else + { + AddOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff); + } + } + } + } + + private int NextChar() + { + nextCharCounter++; + if (buffer != null && buffer.Count != 0) + { + char tempObject = buffer.First.Value; + buffer.RemoveFirst(); + return (tempObject); + } + return input.Read(); + } + + private void PushChar(int c) + { + nextCharCounter--; + if (buffer == null) + { + buffer = new LinkedList(); + } + buffer.AddFirst((char)c); + } + + private void PushLastChar(int c) + { + if (buffer == null) + { + buffer = new LinkedList(); + } + buffer.AddLast((char)c); + } + + private NormalizeCharMap Match(NormalizeCharMap map) + { + NormalizeCharMap result = null; + if (map.submap != null) + { + int chr = NextChar(); + if (chr != - 1) + { + NormalizeCharMap subMap = map.submap[(char)chr]; + if (subMap != null) + { + result = Match(subMap); + } + if (result == null) + { + PushChar(chr); + } + } + } + if (result == null && map.normStr != null) + { + result = map; + } + return result; + } + + public override int Read(System.Char[] cbuf, int off, int len) + { + var tmp = new char[len]; + int l = input.Read(tmp, 0, len); + if (l != 0) + { + for (int i = 0; i < l; i++) + PushLastChar(tmp[i]); + } + l = 0; + for (int i = off; i < off + len; i++) + { + int c = Read(); + if (c == - 1) + break; + cbuf[i] = (char) c; + l++; + } + return l == 0?- 1:l; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/NormalizeCharMap.cs b/external/Lucene.Net.Light/src/core/Analysis/NormalizeCharMap.cs new file mode 100644 index 0000000000..7fd520c876 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/NormalizeCharMap.cs @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Support; + +namespace Lucene.Net.Analysis +{ + + /// Holds a map of String input to String output, to be used + /// with . + /// + public class NormalizeCharMap + { + internal System.Collections.Generic.IDictionary submap; + internal System.String normStr; + internal int diff; + + /// Records a replacement to be applied to the inputs + /// stream. Whenever singleMatch occurs in + /// the input, it will be replaced with + /// replacement. + /// + /// + /// input String to be replaced + /// + /// output String + /// + public virtual void Add(System.String singleMatch, System.String replacement) + { + NormalizeCharMap currMap = this; + for (var i = 0; i < singleMatch.Length; i++) + { + char c = singleMatch[i]; + if (currMap.submap == null) + { + currMap.submap = new HashMap(1); + } + var map = currMap.submap[c]; + if (map == null) + { + map = new NormalizeCharMap(); + currMap.submap[c] = map; + } + currMap = map; + } + if (currMap.normStr != null) + { + throw new System.SystemException("MappingCharFilter: there is already a mapping for " + singleMatch); + } + currMap.normStr = replacement; + currMap.diff = singleMatch.Length - replacement.Length; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/NumericTokenStream.cs b/external/Lucene.Net.Light/src/core/Analysis/NumericTokenStream.cs new file mode 100644 index 0000000000..90b6e72882 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/NumericTokenStream.cs @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Search; +using AttributeSource = Lucene.Net.Util.AttributeSource; +using NumericUtils = Lucene.Net.Util.NumericUtils; +using NumericField = Lucene.Net.Documents.NumericField; +// javadocs + +namespace Lucene.Net.Analysis +{ + + /// Expert: This class provides a + /// for indexing numeric values that can be used by + /// or . + /// + ///

Note that for simple usage, is + /// recommended. disables norms and + /// term freqs, as they are not usually needed during + /// searching. If you need to change these settings, you + /// should use this class. + /// + ///

See for capabilities of fields + /// indexed numerically.

+ /// + ///

Here's an example usage, for an int field: + /// + /// + /// Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value)); + /// field.setOmitNorms(true); + /// field.setOmitTermFreqAndPositions(true); + /// document.add(field); + /// + /// + ///

For optimal performance, re-use the TokenStream and Field instance + /// for more than one document: + /// + /// + /// NumericTokenStream stream = new NumericTokenStream(precisionStep); + /// Field field = new Field(name, stream); + /// field.setOmitNorms(true); + /// field.setOmitTermFreqAndPositions(true); + /// Document document = new Document(); + /// document.add(field); + /// + /// for(all documents) { + /// stream.setIntValue(value) + /// writer.addDocument(document); + /// } + /// + /// + ///

This stream is not intended to be used in analyzers; + /// it's more for iterating the different precisions during + /// indexing a specific numeric value.

+ /// + ///

NOTE: as token streams are only consumed once + /// the document is added to the index, if you index more + /// than one numeric field, use a separate NumericTokenStream + /// instance for each.

+ /// + ///

See for more details on the + /// precisionStep + /// parameter as well as how numeric fields work under the hood.

+ /// + ///

NOTE: This API is experimental and + /// might change in incompatible ways in the next release. + /// Since 2.9 + ///

+ public sealed class NumericTokenStream : TokenStream + { + private void InitBlock() + { + termAtt = AddAttribute(); + typeAtt = AddAttribute(); + posIncrAtt = AddAttribute(); + } + + /// The full precision token gets this token type assigned. + public const System.String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric"; + + /// The lower precision tokens gets this token type assigned. + public const System.String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric"; + + /// Creates a token stream for numeric values using the default precisionStep + /// (4). The stream is not yet initialized, + /// before using set a value using the various set???Value() methods. + /// + public NumericTokenStream():this(NumericUtils.PRECISION_STEP_DEFAULT) + { + } + + /// Creates a token stream for numeric values with the specified + /// precisionStep. The stream is not yet initialized, + /// before using set a value using the various set???Value() methods. + /// + public NumericTokenStream(int precisionStep):base() + { + InitBlock(); + this.precisionStep = precisionStep; + if (precisionStep < 1) + throw new System.ArgumentException("precisionStep must be >=1"); + } + + /// Expert: Creates a token stream for numeric values with the specified + /// precisionStep using the given . + /// The stream is not yet initialized, + /// before using set a value using the various set???Value() methods. + /// + public NumericTokenStream(AttributeSource source, int precisionStep):base(source) + { + InitBlock(); + this.precisionStep = precisionStep; + if (precisionStep < 1) + throw new System.ArgumentException("precisionStep must be >=1"); + } + + /// Expert: Creates a token stream for numeric values with the specified + /// precisionStep using the given + /// . + /// The stream is not yet initialized, + /// before using set a value using the various set???Value() methods. + /// + public NumericTokenStream(AttributeFactory factory, int precisionStep):base(factory) + { + InitBlock(); + this.precisionStep = precisionStep; + if (precisionStep < 1) + throw new System.ArgumentException("precisionStep must be >=1"); + } + + /// Initializes the token stream with the supplied long value. + /// the value, for which this TokenStream should enumerate tokens. + /// + /// this instance, because of this you can use it the following way: + /// new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value)) + /// + public NumericTokenStream SetLongValue(long value_Renamed) + { + this.value_Renamed = value_Renamed; + valSize = 64; + shift = 0; + return this; + } + + /// Initializes the token stream with the supplied int value. + /// the value, for which this TokenStream should enumerate tokens. + /// + /// this instance, because of this you can use it the following way: + /// new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value)) + /// + public NumericTokenStream SetIntValue(int value_Renamed) + { + this.value_Renamed = (long) value_Renamed; + valSize = 32; + shift = 0; + return this; + } + + /// Initializes the token stream with the supplied double value. + /// the value, for which this TokenStream should enumerate tokens. + /// + /// this instance, because of this you can use it the following way: + /// new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value)) + /// + public NumericTokenStream SetDoubleValue(double value_Renamed) + { + this.value_Renamed = NumericUtils.DoubleToSortableLong(value_Renamed); + valSize = 64; + shift = 0; + return this; + } + + /// Initializes the token stream with the supplied float value. + /// the value, for which this TokenStream should enumerate tokens. + /// + /// this instance, because of this you can use it the following way: + /// new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value)) + /// + public NumericTokenStream SetFloatValue(float value_Renamed) + { + this.value_Renamed = (long) NumericUtils.FloatToSortableInt(value_Renamed); + valSize = 32; + shift = 0; + return this; + } + + // @Override + public override void Reset() + { + if (valSize == 0) + throw new System.SystemException("call set???Value() before usage"); + shift = 0; + } + + protected override void Dispose(bool disposing) + { + // Do nothing. + } + + // @Override + public override bool IncrementToken() + { + if (valSize == 0) + throw new System.SystemException("call set???Value() before usage"); + if (shift >= valSize) + return false; + + ClearAttributes(); + char[] buffer; + switch (valSize) + { + + case 64: + buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_LONG); + termAtt.SetTermLength(NumericUtils.LongToPrefixCoded(value_Renamed, shift, buffer)); + break; + + + case 32: + buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_INT); + termAtt.SetTermLength(NumericUtils.IntToPrefixCoded((int) value_Renamed, shift, buffer)); + break; + + + default: + // should not happen + throw new System.ArgumentException("valSize must be 32 or 64"); + + } + + typeAtt.Type = (shift == 0)?TOKEN_TYPE_FULL_PREC:TOKEN_TYPE_LOWER_PREC; + posIncrAtt.PositionIncrement = (shift == 0)?1:0; + shift += precisionStep; + return true; + } + + // @Override + public override System.String ToString() + { + System.Text.StringBuilder sb = new System.Text.StringBuilder("(numeric,valSize=").Append(valSize); + sb.Append(",precisionStep=").Append(precisionStep).Append(')'); + return sb.ToString(); + } + + // members + private ITermAttribute termAtt; + private ITypeAttribute typeAtt; + private IPositionIncrementAttribute posIncrAtt; + + private int shift = 0, valSize = 0; // valSize==0 means not initialized + private readonly int precisionStep; + + private long value_Renamed = 0L; + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/PerFieldAnalyzerWrapper.cs b/external/Lucene.Net.Light/src/core/Analysis/PerFieldAnalyzerWrapper.cs new file mode 100644 index 0000000000..b1c43aa886 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/PerFieldAnalyzerWrapper.cs @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Analysis +{ + + /// This analyzer is used to facilitate scenarios where different + /// fields require different analysis techniques. Use + /// to add a non-default analyzer on a field name basis. + /// + ///

Example usage: + /// + /// + /// PerFieldAnalyzerWrapper aWrapper = + /// new PerFieldAnalyzerWrapper(new StandardAnalyzer()); + /// aWrapper.addAnalyzer("firstname", new KeywordAnalyzer()); + /// aWrapper.addAnalyzer("lastname", new KeywordAnalyzer()); + /// + /// + ///

In this example, StandardAnalyzer will be used for all fields except "firstname" + /// and "lastname", for which KeywordAnalyzer will be used. + /// + ///

A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing + /// and query parsing. + ///

+ public class PerFieldAnalyzerWrapper:Analyzer + { + private readonly Analyzer defaultAnalyzer; + private readonly IDictionary analyzerMap = new HashMap(); + + + /// Constructs with default analyzer. + /// + /// + /// Any fields not specifically + /// defined to use a different analyzer will use the one provided here. + /// + public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) + : this(defaultAnalyzer, null) + { + } + + /// Constructs with default analyzer and a map of analyzers to use for + /// specific fields. + /// + /// + /// Any fields not specifically + /// defined to use a different analyzer will use the one provided here. + /// + /// a Map (String field name to the Analyzer) to be + /// used for those fields + /// + public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, IEnumerable> fieldAnalyzers) + { + this.defaultAnalyzer = defaultAnalyzer; + if (fieldAnalyzers != null) + { + foreach(var entry in fieldAnalyzers) + analyzerMap[entry.Key] = entry.Value; + } + SetOverridesTokenStreamMethod(); + } + + + /// Defines an analyzer to use for the specified field. + /// + /// + /// field name requiring a non-default analyzer + /// + /// non-default analyzer to use for field + /// + public virtual void AddAnalyzer(System.String fieldName, Analyzer analyzer) + { + analyzerMap[fieldName] = analyzer; + } + + public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) + { + var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer; + + return analyzer.TokenStream(fieldName, reader); + } + + public override TokenStream ReusableTokenStream(string fieldName, System.IO.TextReader reader) + { + if (overridesTokenStreamMethod) + { + // LUCENE-1678: force fallback to tokenStream() if we + // have been subclassed and that subclass overrides + // tokenStream but not reusableTokenStream + return TokenStream(fieldName, reader); + } + var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer; + + return analyzer.ReusableTokenStream(fieldName, reader); + } + + /// Return the positionIncrementGap from the analyzer assigned to fieldName + public override int GetPositionIncrementGap(string fieldName) + { + var analyzer = analyzerMap[fieldName] ?? defaultAnalyzer; + return analyzer.GetPositionIncrementGap(fieldName); + } + + /// Return the offsetGap from the analyzer assigned to field + public override int GetOffsetGap(Documents.IFieldable field) + { + Analyzer analyzer = analyzerMap[field.Name] ?? defaultAnalyzer; + return analyzer.GetOffsetGap(field); + } + + public override System.String ToString() + { + // {{Aroush-2.9}} will 'analyzerMap.ToString()' work in the same way as Java's java.util.HashMap.toString()? + return "PerFieldAnalyzerWrapper(" + analyzerMap + ", default=" + defaultAnalyzer + ")"; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/PorterStemFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/PorterStemFilter.cs new file mode 100644 index 0000000000..b7f1dbf560 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/PorterStemFilter.cs @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Analysis.Tokenattributes; + +namespace Lucene.Net.Analysis +{ + + /// Transforms the token stream as per the Porter stemming algorithm. + /// Note: the input to the stemming filter must already be in lower case, + /// so you will need to use LowerCaseFilter or LowerCaseTokenizer farther + /// down the Tokenizer chain in order for this to work properly! + ///

+ /// To use this filter with other analyzers, you'll want to write an + /// Analyzer class that sets up the TokenStream chain as you want it. + /// To use this with LowerCaseTokenizer, for example, you'd write an + /// analyzer like this: + ///

+ /// + /// class MyAnalyzer extends Analyzer { + /// public final TokenStream tokenStream(String fieldName, Reader reader) { + /// return new PorterStemFilter(new LowerCaseTokenizer(reader)); + /// } + /// } + /// + ///

+ public sealed class PorterStemFilter:TokenFilter + { + private readonly PorterStemmer stemmer; + private readonly ITermAttribute termAtt; + + public PorterStemFilter(TokenStream in_Renamed):base(in_Renamed) + { + stemmer = new PorterStemmer(); + termAtt = AddAttribute(); + } + + public override bool IncrementToken() + { + if (!input.IncrementToken()) + return false; + + if (stemmer.Stem(termAtt.TermBuffer(), 0, termAtt.TermLength())) + termAtt.SetTermBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength); + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/PorterStemmer.cs b/external/Lucene.Net.Light/src/core/Analysis/PorterStemmer.cs new file mode 100644 index 0000000000..f47c5a76fd --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/PorterStemmer.cs @@ -0,0 +1,746 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + +Porter stemmer in Java. The original paper is in + +Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, +no. 3, pp 130-137, + +See also http://www.tartarus.org/~martin/PorterStemmer/index.html + +Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below. +Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1] +is then out outside the bounds of b. + +Similarly, + +Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below. +'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and +b[j] is then outside the bounds of b. + +Release 3. + +[ This version is derived from Release 3, modified by Brian Goetz to +optimize for fewer object creations. ] +*/ +using System; +namespace Lucene.Net.Analysis +{ + + /// + /// Stemmer, implementing the Porter Stemming Algorithm + /// + /// The Stemmer class transforms a word into its root form. The input + /// word can be provided a character at time (by calling add()), or at once + /// by calling one of the various stem(something) methods. + /// + + class PorterStemmer + { + private char[] b; + private int i, j, k, k0; + private bool dirty = false; + private const int INC = 50; /* unit of size whereby b is increased */ + private const int EXTRA = 1; + + public PorterStemmer() + { + b = new char[INC]; + i = 0; + } + + /// reset() resets the stemmer so it can stem another word. If you invoke + /// the stemmer by calling add(char) and then stem(), you must call reset() + /// before starting another word. + /// + public virtual void Reset() + { + i = 0; dirty = false; + } + + /// Add a character to the word being stemmed. When you are finished + /// adding characters, you can call stem(void) to process the word. + /// + public virtual void Add(char ch) + { + if (b.Length <= i + EXTRA) + { + var new_b = new char[b.Length + INC]; + Array.Copy(b, 0, new_b, 0, b.Length); + b = new_b; + } + b[i++] = ch; + } + + /// After a word has been stemmed, it can be retrieved by toString(), + /// or a reference to the internal buffer can be retrieved by getResultBuffer + /// and getResultLength (which is generally more efficient.) + /// + public override System.String ToString() + { + return new System.String(b, 0, i); + } + + /// Returns the length of the word resulting from the stemming process. + public virtual int ResultLength + { + get { return i; } + } + + /// Returns a reference to a character buffer containing the results of + /// the stemming process. You also need to consult getResultLength() + /// to determine the length of the result. + /// + public virtual char[] ResultBuffer + { + get { return b; } + } + + /* cons(i) is true <=> b[i] is a consonant. */ + + private bool Cons(int i) + { + switch (b[i]) + { + + case 'a': + case 'e': + case 'i': + case 'o': + case 'u': + return false; + + case 'y': + return (i == k0)?true:!Cons(i - 1); + + default: + return true; + + } + } + + /* m() measures the number of consonant sequences between k0 and j. if c is + a consonant sequence and v a vowel sequence, and <..> indicates arbitrary + presence, + + gives 0 + vc gives 1 + vcvc gives 2 + vcvcvc gives 3 + .... + */ + + private int M() + { + int n = 0; + int i = k0; + while (true) + { + if (i > j) + return n; + if (!Cons(i)) + break; + i++; + } + i++; + while (true) + { + while (true) + { + if (i > j) + return n; + if (Cons(i)) + break; + i++; + } + i++; + n++; + while (true) + { + if (i > j) + return n; + if (!Cons(i)) + break; + i++; + } + i++; + } + } + + /* vowelinstem() is true <=> k0,...j contains a vowel */ + + private bool Vowelinstem() + { + int i; + for (i = k0; i <= j; i++) + if (!Cons(i)) + return true; + return false; + } + + /* doublec(j) is true <=> j,(j-1) contain a double consonant. */ + + private bool Doublec(int j) + { + if (j < k0 + 1) + return false; + if (b[j] != b[j - 1]) + return false; + return Cons(j); + } + + /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant + and also if the second c is not w,x or y. this is used when trying to + restore an e at the end of a short word. e.g. + + cav(e), lov(e), hop(e), crim(e), but + snow, box, tray. + + */ + + private bool Cvc(int i) + { + if (i < k0 + 2 || !Cons(i) || Cons(i - 1) || !Cons(i - 2)) + return false; + else + { + int ch = b[i]; + if (ch == 'w' || ch == 'x' || ch == 'y') + return false; + } + return true; + } + + private bool Ends(System.String s) + { + int l = s.Length; + int o = k - l + 1; + if (o < k0) + return false; + for (int i = 0; i < l; i++) + if (b[o + i] != s[i]) + return false; + j = k - l; + return true; + } + + /* setto(s) sets (j+1),...k to the characters in the string s, readjusting + k. */ + + internal virtual void Setto(System.String s) + { + int l = s.Length; + int o = j + 1; + for (int i = 0; i < l; i++) + b[o + i] = s[i]; + k = j + l; + dirty = true; + } + + /* r(s) is used further down. */ + + internal virtual void R(System.String s) + { + if (M() > 0) + Setto(s); + } + + /* step1() gets rid of plurals and -ed or -ing. e.g. + + caresses -> caress + ponies -> poni + ties -> ti + caress -> caress + cats -> cat + + feed -> feed + agreed -> agree + disabled -> disable + + matting -> mat + mating -> mate + meeting -> meet + milling -> mill + messing -> mess + + meetings -> meet + + */ + + private void Step1() + { + if (b[k] == 's') + { + if (Ends("sses")) + k -= 2; + else if (Ends("ies")) + Setto("i"); + else if (b[k - 1] != 's') + k--; + } + if (Ends("eed")) + { + if (M() > 0) + k--; + } + else if ((Ends("ed") || Ends("ing")) && Vowelinstem()) + { + k = j; + if (Ends("at")) + Setto("ate"); + else if (Ends("bl")) + Setto("ble"); + else if (Ends("iz")) + Setto("ize"); + else if (Doublec(k)) + { + int ch = b[k--]; + if (ch == 'l' || ch == 's' || ch == 'z') + k++; + } + else if (M() == 1 && Cvc(k)) + Setto("e"); + } + } + + /* step2() turns terminal y to i when there is another vowel in the stem. */ + + private void Step2() + { + if (Ends("y") && Vowelinstem()) + { + b[k] = 'i'; + dirty = true; + } + } + + /* step3() maps double suffices to single ones. so -ization ( = -ize plus + -ation) maps to -ize etc. note that the string before the suffix must give + m() > 0. */ + + private void Step3() + { + if (k == k0) + return ; /* For Bug 1 */ + switch (b[k - 1]) + { + + case 'a': + if (Ends("ational")) + { + R("ate"); break; + } + if (Ends("tional")) + { + R("tion"); break; + } + break; + + case 'c': + if (Ends("enci")) + { + R("ence"); break; + } + if (Ends("anci")) + { + R("ance"); break; + } + break; + + case 'e': + if (Ends("izer")) + { + R("ize"); break; + } + break; + + case 'l': + if (Ends("bli")) + { + R("ble"); break; + } + if (Ends("alli")) + { + R("al"); break; + } + if (Ends("entli")) + { + R("ent"); break; + } + if (Ends("eli")) + { + R("e"); break; + } + if (Ends("ousli")) + { + R("ous"); break; + } + break; + + case 'o': + if (Ends("ization")) + { + R("ize"); break; + } + if (Ends("ation")) + { + R("ate"); break; + } + if (Ends("ator")) + { + R("ate"); break; + } + break; + + case 's': + if (Ends("alism")) + { + R("al"); break; + } + if (Ends("iveness")) + { + R("ive"); break; + } + if (Ends("fulness")) + { + R("ful"); break; + } + if (Ends("ousness")) + { + R("ous"); break; + } + break; + + case 't': + if (Ends("aliti")) + { + R("al"); break; + } + if (Ends("iviti")) + { + R("ive"); break; + } + if (Ends("biliti")) + { + R("ble"); break; + } + break; + + case 'g': + if (Ends("logi")) + { + R("log"); break; + } + break; + } + } + + /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */ + + private void Step4() + { + switch (b[k]) + { + + case 'e': + if (Ends("icate")) + { + R("ic"); break; + } + if (Ends("ative")) + { + R(""); break; + } + if (Ends("alize")) + { + R("al"); break; + } + break; + + case 'i': + if (Ends("iciti")) + { + R("ic"); break; + } + break; + + case 'l': + if (Ends("ical")) + { + R("ic"); break; + } + if (Ends("ful")) + { + R(""); break; + } + break; + + case 's': + if (Ends("ness")) + { + R(""); break; + } + break; + } + } + + /* step5() takes off -ant, -ence etc., in context vcvc. */ + + private void Step5() + { + if (k == k0) + return ; /* for Bug 1 */ + switch (b[k - 1]) + { + + case 'a': + if (Ends("al")) + break; + return ; + + case 'c': + if (Ends("ance")) + break; + if (Ends("ence")) + break; + return ; + + case 'e': + if (Ends("er")) + break; return ; + + case 'i': + if (Ends("ic")) + break; return ; + + case 'l': + if (Ends("able")) + break; + if (Ends("ible")) + break; return ; + + case 'n': + if (Ends("ant")) + break; + if (Ends("ement")) + break; + if (Ends("ment")) + break; + /* element etc. not stripped before the m */ + if (Ends("ent")) + break; + return ; + + case 'o': + if (Ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) + break; + /* j >= 0 fixes Bug 2 */ + if (Ends("ou")) + break; + return ; + /* takes care of -ous */ + + case 's': + if (Ends("ism")) + break; + return ; + + case 't': + if (Ends("ate")) + break; + if (Ends("iti")) + break; + return ; + + case 'u': + if (Ends("ous")) + break; + return ; + + case 'v': + if (Ends("ive")) + break; + return ; + + case 'z': + if (Ends("ize")) + break; + return ; + + default: + return ; + + } + if (M() > 1) + k = j; + } + + /* step6() removes a final -e if m() > 1. */ + + private void Step6() + { + j = k; + if (b[k] == 'e') + { + int a = M(); + if (a > 1 || a == 1 && !Cvc(k - 1)) + k--; + } + if (b[k] == 'l' && Doublec(k) && M() > 1) + k--; + } + + + /// Stem a word provided as a String. Returns the result as a String. + public virtual System.String Stem(System.String s) + { + if (Stem(s.ToCharArray(), s.Length)) + { + return ToString(); + } + else + return s; + } + + /// Stem a word contained in a char[]. Returns true if the stemming process + /// resulted in a word different from the input. You can retrieve the + /// result with getResultLength()/getResultBuffer() or toString(). + /// + public virtual bool Stem(char[] word) + { + return Stem(word, word.Length); + } + + /// Stem a word contained in a portion of a char[] array. Returns + /// true if the stemming process resulted in a word different from + /// the input. You can retrieve the result with + /// getResultLength()/getResultBuffer() or toString(). + /// + public virtual bool Stem(char[] wordBuffer, int offset, int wordLen) + { + Reset(); + if (b.Length < wordLen) + { + var new_b = new char[wordLen + EXTRA]; + b = new_b; + } + Array.Copy(wordBuffer, offset, b, 0, wordLen); + i = wordLen; + return Stem(0); + } + + /// Stem a word contained in a leading portion of a char[] array. + /// Returns true if the stemming process resulted in a word different + /// from the input. You can retrieve the result with + /// getResultLength()/getResultBuffer() or toString(). + /// + public virtual bool Stem(char[] word, int wordLen) + { + return Stem(word, 0, wordLen); + } + + /// Stem the word placed into the Stemmer buffer through calls to add(). + /// Returns true if the stemming process resulted in a word different + /// from the input. You can retrieve the result with + /// getResultLength()/getResultBuffer() or toString(). + /// + public virtual bool Stem() + { + return Stem(0); + } + + public virtual bool Stem(int i0) + { + k = i - 1; + k0 = i0; + if (k > k0 + 1) + { + Step1(); Step2(); Step3(); Step4(); Step5(); Step6(); + } + // Also, a word is considered dirty if we lopped off letters + // Thanks to Ifigenia Vairelles for pointing this out. + if (i != k + 1) + dirty = true; + i = k + 1; + return dirty; + } + + /// Test program for demonstrating the Stemmer. It reads a file and + /// stems each word, writing the result to standard out. + /// Usage: Stemmer file-name + /// + [STAThread] + public static void Main(System.String[] args) + { + var s = new PorterStemmer(); + + for (int i = 0; i < args.Length; i++) + { + try + { + System.IO.Stream in_Renamed = new System.IO.FileStream(args[i], System.IO.FileMode.Open, System.IO.FileAccess.Read); + var buffer = new byte[1024]; + + int bufferLen = in_Renamed.Read(buffer, 0, buffer.Length); + int offset = 0; + s.Reset(); + + while (true) + { + int ch; + if (offset < bufferLen) + ch = buffer[offset++]; + else + { + bufferLen = in_Renamed.Read(buffer, 0, buffer.Length); + offset = 0; + if (bufferLen < 0) + ch = - 1; + else + ch = buffer[offset++]; + } + + if (Char.IsLetter((char) ch)) + { + s.Add(Char.ToLower((char) ch)); + } + else + { + s.Stem(); + Console.Out.Write(s.ToString()); + s.Reset(); + if (ch < 0) + break; + else + { + System.Console.Out.Write((char) ch); + } + } + } + + in_Renamed.Close(); + } + catch (System.IO.IOException) + { + Console.Out.WriteLine("error reading " + args[i]); + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/SimpleAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/SimpleAnalyzer.cs new file mode 100644 index 0000000000..b84f470e8e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/SimpleAnalyzer.cs @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Analysis +{ + + /// An that filters + /// with + /// + + public sealed class SimpleAnalyzer : Analyzer + { + public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) + { + return new LowerCaseTokenizer(reader); + } + + public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) + { + var tokenizer = (Tokenizer) PreviousTokenStream; + if (tokenizer == null) + { + tokenizer = new LowerCaseTokenizer(reader); + PreviousTokenStream = tokenizer; + } + else + tokenizer.Reset(reader); + return tokenizer; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardAnalyzer.cs new file mode 100644 index 0000000000..347d0262a4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardAnalyzer.cs @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Collections.Generic; +using Lucene.Net.Analysis; +using Lucene.Net.Util; +using Version = Lucene.Net.Util.Version; + +namespace Lucene.Net.Analysis.Standard +{ + + /// Filters with , + /// and , using a list of English stop + /// words. + /// + /// + ///

+ /// You must specify the required compatibility when creating + /// StandardAnalyzer: + /// + /// As of 2.9, StopFilter preserves position increments + /// As of 2.4, Tokens incorrectly identified as acronyms are corrected (see + /// LUCENE-1608) + /// + ///

+ public class StandardAnalyzer : Analyzer + { + private ISet stopSet; + + /// Specifies whether deprecated acronyms should be replaced with HOST type. + /// See https://issues.apache.org/jira/browse/LUCENE-1068 + /// + private bool replaceInvalidAcronym, enableStopPositionIncrements; + + /// An unmodifiable set containing some common English words that are usually not + /// useful for searching. + /// + public static readonly ISet STOP_WORDS_SET; + private Version matchVersion; + + /// Builds an analyzer with the default stop words (). + /// + /// Lucene version to match see above + public StandardAnalyzer(Version matchVersion) + : this(matchVersion, STOP_WORDS_SET) + { } + + /// Builds an analyzer with the given stop words. + /// Lucene version to match See above /> + /// + /// + /// stop words + /// + public StandardAnalyzer(Version matchVersion, ISet stopWords) + { + stopSet = stopWords; + SetOverridesTokenStreamMethod(); + enableStopPositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion); + replaceInvalidAcronym = matchVersion.OnOrAfter(Version.LUCENE_24); + this.matchVersion = matchVersion; + } + + /// Builds an analyzer with the stop words from the given file. + /// + /// + /// Lucene version to match See above /> + /// + /// + /// File to read stop words from + /// + public StandardAnalyzer(Version matchVersion, System.IO.FileInfo stopwords) + : this (matchVersion, WordlistLoader.GetWordSet(stopwords)) + { + } + + /// Builds an analyzer with the stop words from the given reader. + /// + /// + /// Lucene version to match See above /> + /// + /// + /// Reader to read stop words from + /// + public StandardAnalyzer(Version matchVersion, System.IO.TextReader stopwords) + : this(matchVersion, WordlistLoader.GetWordSet(stopwords)) + { } + + /// Constructs a filtered by a + ///, a and a . + /// + public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) + { + StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader); + tokenStream.MaxTokenLength = maxTokenLength; + TokenStream result = new StandardFilter(tokenStream); + result = new LowerCaseFilter(result); + result = new StopFilter(enableStopPositionIncrements, result, stopSet); + return result; + } + + private sealed class SavedStreams + { + internal StandardTokenizer tokenStream; + internal TokenStream filteredTokenStream; + } + + /// Default maximum allowed token length + public const int DEFAULT_MAX_TOKEN_LENGTH = 255; + + private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; + + /// Set maximum allowed token length. If a token is seen + /// that exceeds this length then it is discarded. This + /// setting only takes effect the next time tokenStream or + /// reusableTokenStream is called. + /// + public virtual int MaxTokenLength + { + get { return maxTokenLength; } + set { maxTokenLength = value; } + } + + public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) + { + if (overridesTokenStreamMethod) + { + // LUCENE-1678: force fallback to tokenStream() if we + // have been subclassed and that subclass overrides + // tokenStream but not reusableTokenStream + return TokenStream(fieldName, reader); + } + SavedStreams streams = (SavedStreams) PreviousTokenStream; + if (streams == null) + { + streams = new SavedStreams(); + PreviousTokenStream = streams; + streams.tokenStream = new StandardTokenizer(matchVersion, reader); + streams.filteredTokenStream = new StandardFilter(streams.tokenStream); + streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); + streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, + streams.filteredTokenStream, stopSet); + } + else + { + streams.tokenStream.Reset(reader); + } + streams.tokenStream.MaxTokenLength = maxTokenLength; + + streams.tokenStream.SetReplaceInvalidAcronym(replaceInvalidAcronym); + + return streams.filteredTokenStream; + } + static StandardAnalyzer() + { + STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardFilter.cs new file mode 100644 index 0000000000..fd13261084 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardFilter.cs @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Token = Lucene.Net.Analysis.Token; +using TokenFilter = Lucene.Net.Analysis.TokenFilter; +using TokenStream = Lucene.Net.Analysis.TokenStream; + +namespace Lucene.Net.Analysis.Standard +{ + + /// Normalizes tokens extracted with . + + public sealed class StandardFilter:TokenFilter + { + + + /// Construct filtering in. + public StandardFilter(TokenStream in_Renamed):base(in_Renamed) + { + termAtt = AddAttribute(); + typeAtt = AddAttribute(); + } + + private static readonly System.String APOSTROPHE_TYPE; + private static readonly System.String ACRONYM_TYPE; + + // this filters uses attribute type + private ITypeAttribute typeAtt; + private ITermAttribute termAtt; + + /// Returns the next token in the stream, or null at EOS. + ///

Removes 's from the end of words. + ///

Removes dots from acronyms. + ///

+ public override bool IncrementToken() + { + if (!input.IncrementToken()) + { + return false; + } + + char[] buffer = termAtt.TermBuffer(); + int bufferLength = termAtt.TermLength(); + System.String type = typeAtt.Type; + + if ((System.Object) type == (System.Object) APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) + { + // Strip last 2 characters off + termAtt.SetTermLength(bufferLength - 2); + } + else if ((System.Object) type == (System.Object) ACRONYM_TYPE) + { + // remove dots + int upto = 0; + for (int i = 0; i < bufferLength; i++) + { + char c = buffer[i]; + if (c != '.') + buffer[upto++] = c; + } + termAtt.SetTermLength(upto); + } + + return true; + } + static StandardFilter() + { + APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE]; + ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizer.cs new file mode 100644 index 0000000000..dca409d59a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizer.cs @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Util; +using CharReader = Lucene.Net.Analysis.CharReader; +using Token = Lucene.Net.Analysis.Token; +using Tokenizer = Lucene.Net.Analysis.Tokenizer; +using AttributeSource = Lucene.Net.Util.AttributeSource; +using Version = Lucene.Net.Util.Version; + +namespace Lucene.Net.Analysis.Standard +{ + + /// A grammar-based tokenizer constructed with JFlex + /// + ///

This should be a good tokenizer for most European-language documents: + /// + /// + /// Splits words at punctuation characters, removing punctuation. However, a + /// dot that's not followed by whitespace is considered part of a token. + /// Splits words at hyphens, unless there's a number in the token, in which case + /// the whole token is interpreted as a product number and is not split. + /// Recognizes email addresses and internet hostnames as one token. + /// + /// + ///

Many applications have specific tokenizer needs. If this tokenizer does + /// not suit your application, please consider copying this source code + /// directory to your project and maintaining your own grammar-based tokenizer. + /// + /// + ///

+ /// You must specify the required compatibility when creating + /// StandardAnalyzer: + /// + /// As of 2.4, Tokens incorrectly identified as acronyms are corrected (see + /// LUCENE-1608 + /// + ///

+ + public sealed class StandardTokenizer:Tokenizer + { + private void InitBlock() + { + maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; + } + /// A private instance of the JFlex-constructed scanner + private StandardTokenizerImpl scanner; + + public const int ALPHANUM = 0; + public const int APOSTROPHE = 1; + public const int ACRONYM = 2; + public const int COMPANY = 3; + public const int EMAIL = 4; + public const int HOST = 5; + public const int NUM = 6; + public const int CJ = 7; + + /// this solves a bug where HOSTs that end with '.' are identified + /// as ACRONYMs. + /// + [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs.")] + public const int ACRONYM_DEP = 8; + + /// String token types that correspond to token type int constants + public static readonly System.String[] TOKEN_TYPES = new System.String[]{"", "", "", "", "", "", "", "", ""}; + + private bool replaceInvalidAcronym; + + private int maxTokenLength; + + /// Set the max allowed token length. Any token longer + /// than this is skipped. + /// + public int MaxTokenLength + { + get { return maxTokenLength; } + set { this.maxTokenLength = value; } + } + + /// Creates a new instance of the + /// . Attaches + /// the input to the newly created JFlex scanner. + /// + /// + /// + /// The input reader + /// + /// See http://issues.apache.org/jira/browse/LUCENE-1068 + /// + public StandardTokenizer(Version matchVersion, System.IO.TextReader input):base() + { + InitBlock(); + this.scanner = new StandardTokenizerImpl(input); + Init(input, matchVersion); + } + + /// Creates a new StandardTokenizer with a given . + public StandardTokenizer(Version matchVersion, AttributeSource source, System.IO.TextReader input):base(source) + { + InitBlock(); + this.scanner = new StandardTokenizerImpl(input); + Init(input, matchVersion); + } + + /// Creates a new StandardTokenizer with a given + /// + /// + public StandardTokenizer(Version matchVersion, AttributeFactory factory, System.IO.TextReader input):base(factory) + { + InitBlock(); + this.scanner = new StandardTokenizerImpl(input); + Init(input, matchVersion); + } + + private void Init(System.IO.TextReader input, Version matchVersion) + { + if (matchVersion.OnOrAfter(Version.LUCENE_24)) + { + replaceInvalidAcronym = true; + } + else + { + replaceInvalidAcronym = false; + } + this.input = input; + termAtt = AddAttribute(); + offsetAtt = AddAttribute(); + posIncrAtt = AddAttribute(); + typeAtt = AddAttribute(); + } + + // this tokenizer generates three attributes: + // offset, positionIncrement and type + private ITermAttribute termAtt; + private IOffsetAttribute offsetAtt; + private IPositionIncrementAttribute posIncrAtt; + private ITypeAttribute typeAtt; + + /// + /// (non-Javadoc) + /// + /// + public override bool IncrementToken() + { + ClearAttributes(); + int posIncr = 1; + + while (true) + { + int tokenType = scanner.GetNextToken(); + + if (tokenType == StandardTokenizerImpl.YYEOF) + { + return false; + } + + if (scanner.Yylength() <= maxTokenLength) + { + posIncrAtt.PositionIncrement = posIncr; + scanner.GetText(termAtt); + int start = scanner.Yychar(); + offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + termAtt.TermLength())); + // This 'if' should be removed in the next release. For now, it converts + // invalid acronyms to HOST. When removed, only the 'else' part should + // remain. + if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) + { + if (replaceInvalidAcronym) + { + typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]; + termAtt.SetTermLength(termAtt.TermLength() - 1); // remove extra '.' + } + else + { + typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]; + } + } + else + { + typeAtt.Type = StandardTokenizerImpl.TOKEN_TYPES[tokenType]; + } + return true; + } + // When we skip a too-long term, we still increment the + // position increment + else + posIncr++; + } + } + + public override void End() + { + // set final offset + int finalOffset = CorrectOffset(scanner.Yychar() + scanner.Yylength()); + offsetAtt.SetOffset(finalOffset, finalOffset); + } + + public override void Reset(System.IO.TextReader reader) + { + base.Reset(reader); + scanner.Reset(reader); + } + + /// + /// Remove in 3.X and make true the only valid value + /// See https://issues.apache.org/jira/browse/LUCENE-1068 + /// + /// Set to true to replace mischaracterized acronyms as HOST. + /// + [Obsolete("Remove in 3.X and make true the only valid value. See https://issues.apache.org/jira/browse/LUCENE-1068")] + public void SetReplaceInvalidAcronym(bool replaceInvalidAcronym) + { + this.replaceInvalidAcronym = replaceInvalidAcronym; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizerImpl.cs b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizerImpl.cs new file mode 100644 index 0000000000..cb4bf5fb4b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Standard/StandardTokenizerImpl.cs @@ -0,0 +1,707 @@ +/* The following code was generated by JFlex 1.4.1 on 9/4/08 6:49 PM */ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* + NOTE: if you change StandardTokenizerImpl.jflex and need to regenerate the tokenizer, + the tokenizer, only use Java 1.4 !!! + This grammar currently uses constructs (eg :digit:, :letter:) whose + meaning can vary according to the JRE used to run jflex. See + https://issues.apache.org/jira/browse/LUCENE-1126 for details. + For current backwards compatibility it is needed to support + only Java 1.4 - this will change in Lucene 3.1. +*/ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Token = Lucene.Net.Analysis.Token; + +namespace Lucene.Net.Analysis.Standard +{ + + + /// This class is a scanner generated by + /// JFlex 1.4.1 + /// on 9/4/08 6:49 PM from the specification file + /// /tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex + /// + class StandardTokenizerImpl + { + + /// This character denotes the end of file + public const int YYEOF = - 1; + + /// initial size of the lookahead buffer + private const int ZZ_BUFFERSIZE = 16384; + + /// lexical states + public const int YYINITIAL = 0; + + /// Translates characters to character classes + private const System.String ZZ_CMAP_PACKED = "\x0009\x0000\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0000\x0001\x000C\x0012\x0000\x0001\x0000\x0005\x0000\x0001\x0005" + "\x0001\x0003\x0004\x0000\x0001\x0009\x0001\x0007\x0001\x0004\x0001\x0009\x000A\x0002\x0006\x0000\x0001\x0006\x001A\x000A" + "\x0004\x0000\x0001\x0008\x0001\x0000\x001A\x000A\x002F\x0000\x0001\x000A\x000A\x0000\x0001\x000A\x0004\x0000\x0001\x000A" + "\x0005\x0000\x0017\x000A\x0001\x0000\x001F\x000A\x0001\x0000\u0128\x000A\x0002\x0000\x0012\x000A\x001C\x0000\x005E\x000A" + "\x0002\x0000\x0009\x000A\x0002\x0000\x0007\x000A\x000E\x0000\x0002\x000A\x000E\x0000\x0005\x000A\x0009\x0000\x0001\x000A" + "\x008B\x0000\x0001\x000A\x000B\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0014\x000A" + "\x0001\x0000\x002C\x000A\x0001\x0000\x0008\x000A\x0002\x0000\x001A\x000A\x000C\x0000\x0082\x000A\x000A\x0000\x0039\x000A" + "\x0002\x0000\x0002\x000A\x0002\x0000\x0002\x000A\x0003\x0000\x0026\x000A\x0002\x0000\x0002\x000A\x0037\x0000\x0026\x000A" + "\x0002\x0000\x0001\x000A\x0007\x0000\x0027\x000A\x0048\x0000\x001B\x000A\x0005\x0000\x0003\x000A\x002E\x0000\x001A\x000A" + "\x0005\x0000\x000B\x000A\x0015\x0000\x000A\x0002\x0007\x0000\x0063\x000A\x0001\x0000\x0001\x000A\x000F\x0000\x0002\x000A" + "\x0009\x0000\x000A\x0002\x0003\x000A\x0013\x0000\x0001\x000A\x0001\x0000\x001B\x000A\x0053\x0000\x0026\x000A\u015f\x0000" + "\x0035\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x0007\x0000\x000A\x000A\x0004\x0000\x000A\x0002\x0015\x0000" + "\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0003\x0000" + "\x0004\x000A\x0022\x0000\x0002\x000A\x0001\x0000\x0003\x000A\x0004\x0000\x000A\x0002\x0002\x000A\x0013\x0000\x0006\x000A" + "\x0004\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0002\x000A\x0001\x0000\x0002\x000A" + + "\x0001\x0000\x0002\x000A\x001F\x0000\x0004\x000A\x0001\x0000\x0001\x000A\x0007\x0000\x000A\x0002\x0002\x0000\x0003\x000A" + "\x0010\x0000\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x0005\x000A\x0003\x0000\x0001\x000A\x0012\x0000\x0001\x000A\x000F\x0000\x0001\x000A" + "\x0005\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0002\x0000\x0002\x000A\x0002\x0000\x0016\x000A\x0001\x0000\x0007\x000A" + "\x0001\x0000\x0002\x000A\x0002\x0000\x0004\x000A\x0003\x0000\x0001\x000A\x001E\x0000\x0002\x000A\x0001\x0000\x0003\x000A" + "\x0004\x0000\x000A\x0002\x0015\x0000\x0006\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0004\x000A\x0003\x0000\x0002\x000A" + "\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A\x0003\x0000\x0002\x000A\x0003\x0000\x0003\x000A\x0003\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x002D\x0000\x0009\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A" + "\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A\x0001\x0000\x000A\x000A\x0001\x0000\x0005\x000A\x0024\x0000\x0001\x000A" + "\x0001\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0008\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0017\x000A" + "\x0001\x0000\x0010\x000A\x0026\x0000\x0002\x000A\x0004\x0000\x000A\x0002\x0015\x0000\x0012\x000A\x0003\x0000\x0018\x000A" + "\x0001\x0000\x0009\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0007\x000A\x0039\x0000\x0001\x0001\x0030\x000A\x0001\x0001" + "\x0002\x000A\x000C\x0001\x0007\x000A\x0009\x0001\x000A\x0002\x0027\x0000\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000" + "\x0002\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0001\x000A\x0006\x0000\x0004\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x000A\x0002\x0000\x0002\x000A\x0001\x0000\x0004\x000A\x0001\x0000" + + "\x0002\x000A\x0009\x0000\x0001\x000A\x0002\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0009\x0000\x000A\x0002\x0002\x0000" + "\x0002\x000A\x0022\x0000\x0001\x000A\x001F\x0000\x000A\x0002\x0016\x0000\x0008\x000A\x0001\x0000\x0022\x000A\x001D\x0000" + "\x0004\x000A\x0074\x0000\x0022\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0002\x000A\x0015\x0000\x000A\x0002\x0006\x0000" + "\x0006\x000A\x004A\x0000\x0026\x000A\x000A\x0000\x0027\x000A\x0009\x0000\x005A\x000A\x0005\x0000\x0044\x000A\x0005\x0000" + "\x0052\x000A\x0006\x0000\x0007\x000A\x0001\x0000\x003F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000" + "\x0007\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0027\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0004\x000A\x0002\x0000\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0007\x000A\x0001\x0000\x0017\x000A\x0001\x0000" + "\x001F\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0002\x0000\x0007\x000A\x0001\x0000\x0027\x000A\x0001\x0000" + "\x0013\x000A\x000E\x0000\x0009\x0002\x002E\x0000\x0055\x000A\x000C\x0000\u026c\x000A\x0002\x0000\x0008\x000A\x000A\x0000" + "\x001A\x000A\x0005\x0000\x004B\x000A\x0095\x0000\x0034\x000A\x002C\x0000\x000A\x0002\x0026\x0000\x000A\x0002\x0006\x0000" + "\x0058\x000A\x0008\x0000\x0029\x000A\u0557\x0000\x009C\x000A\x0004\x0000\x005A\x000A\x0006\x0000\x0016\x000A\x0002\x0000" + "\x0006\x000A\x0002\x0000\x0026\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0008\x000A\x0001\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x001F\x000A\x0002\x0000\x0035\x000A\x0001\x0000\x0007\x000A\x0001\x0000" + "\x0001\x000A\x0003\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0003\x0000\x0004\x000A\x0002\x0000\x0006\x000A\x0004\x0000" + "\x000D\x000A\x0005\x0000\x0003\x000A\x0001\x0000\x0007\x000A\x0082\x0000\x0001\x000A\x0082\x0000\x0001\x000A\x0004\x0000" + + "\x0001\x000A\x0002\x0000\x000A\x000A\x0001\x0000\x0001\x000A\x0003\x0000\x0005\x000A\x0006\x0000\x0001\x000A\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0004\x000A\x0001\x0000\x0003\x000A\x0001\x0000\x0007\x000A\u0ecb\x0000" + "\x0002\x000A\x002A\x0000\x0005\x000A\x000A\x0000\x0001\x000B\x0054\x000B\x0008\x000B\x0002\x000B\x0002\x000B\x005A\x000B" + "\x0001\x000B\x0003\x000B\x0006\x000B\x0028\x000B\x0003\x000B\x0001\x0000\x005E\x000A\x0011\x0000\x0018\x000A\x0038\x0000" + "\x0010\x000B\u0100\x0000\x0080\x000B\x0080\x0000\u19b6\x000B\x000A\x000B\x0040\x0000\u51a6\x000B\x005A\x000B\u048d\x000A" + "\u0773\x0000\u2ba4\x000A\u215c\x0000\u012e\x000B\x00D2\x000B\x0007\x000A\x000C\x0000\x0005\x000A\x0005\x0000\x0001\x000A" + "\x0001\x0000\x000A\x000A\x0001\x0000\x000D\x000A\x0001\x0000\x0005\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0002\x000A" + "\x0001\x0000\x0002\x000A\x0001\x0000\x006C\x000A\x0021\x0000\u016b\x000A\x0012\x0000\x0040\x000A\x0002\x0000\x0036\x000A" + "\x0028\x0000\x000C\x000A\x0074\x0000\x0003\x000A\x0001\x0000\x0001\x000A\x0001\x0000\x0087\x000A\x0013\x0000\x000A\x0002" + "\x0007\x0000\x001A\x000A\x0006\x0000\x001A\x000A\x000A\x0000\x0001\x000B\x003A\x000B\x001F\x000A\x0003\x0000\x0006\x000A" + "\x0002\x0000\x0006\x000A\x0002\x0000\x0006\x000A\x0002\x0000\x0003\x000A\x0023\x0000"; + + /// Translates characters to character classes + private static readonly char[] ZZ_CMAP = ZzUnpackCMap(ZZ_CMAP_PACKED); + + /// Translates DFA states to action switch labels. + private static readonly int[] ZZ_ACTION = ZzUnpackAction(); + + private const System.String ZZ_ACTION_PACKED_0 = "\x0001\x0000\x0001\x0001\x0003\x0002\x0001\x0003\x0001\x0001\x000B\x0000\x0001\x0002\x0003\x0004" + "\x0002\x0000\x0001\x0005\x0001\x0000\x0001\x0005\x0003\x0004\x0006\x0005\x0001\x0006\x0001\x0004" + "\x0002\x0007\x0001\x0008\x0001\x0000\x0001\x0008\x0003\x0000\x0002\x0008\x0001\x0009\x0001\x000A" + "\x0001\x0004"; + + private static int[] ZzUnpackAction() + { + int[] result = new int[51]; + int offset = 0; + offset = ZzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); + return result; + } + + private static int ZzUnpackAction(System.String packed, int offset, int[] result) + { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.Length; + while (i < l) + { + int count = packed[i++]; + int value_Renamed = packed[i++]; + do + result[j++] = value_Renamed; + while (--count > 0); + } + return j; + } + + + /// Translates a state to a row index in the transition table + private static readonly int[] ZZ_ROWMAP = ZzUnpackRowMap(); + + private const System.String ZZ_ROWMAP_PACKED_0 = "\x0000\x0000\x0000\x000E\x0000\x001C\x0000\x002A\x0000\x0038\x0000\x000E\x0000\x0046\x0000\x0054" + "\x0000\x0062\x0000\x0070\x0000\x007E\x0000\x008C\x0000\x009A\x0000\x00A8\x0000\x00B6\x0000\x00C4" + "\x0000\x00D2\x0000\x00E0\x0000\x00EE\x0000\x00FC\x0000\u010a\x0000\u0118\x0000\u0126\x0000\u0134" + "\x0000\u0142\x0000\u0150\x0000\u015e\x0000\u016c\x0000\u017a\x0000\u0188\x0000\u0196\x0000\u01a4" + "\x0000\u01b2\x0000\u01c0\x0000\u01ce\x0000\u01dc\x0000\u01ea\x0000\u01f8\x0000\x00D2\x0000\u0206" + "\x0000\u0214\x0000\u0222\x0000\u0230\x0000\u023e\x0000\u024c\x0000\u025a\x0000\x0054\x0000\x008C" + "\x0000\u0268\x0000\u0276\x0000\u0284"; + + private static int[] ZzUnpackRowMap() + { + int[] result = new int[51]; + int offset = 0; + offset = ZzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); + return result; + } + + private static int ZzUnpackRowMap(System.String packed, int offset, int[] result) + { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.Length; + while (i < l) + { + int high = packed[i++] << 16; + result[j++] = high | packed[i++]; + } + return j; + } + + /// The transition table of the DFA + private static readonly int[] ZZ_TRANS = ZzUnpackTrans(); + + private const System.String ZZ_TRANS_PACKED_0 = "\x0001\x0002\x0001\x0003\x0001\x0004\x0007\x0002\x0001\x0005\x0001\x0006\x0001\x0007\x0001\x0002" + "\x000F\x0000\x0002\x0003\x0001\x0000\x0001\x0008\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x0003\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x0000\x0001\x000C\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x0004\x0004\x0000\x0001\x0003\x0001\x0004\x0001\x000F\x0001\x0010" + "\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0010\x0000\x0001\x0002\x0001\x0000" + "\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0017" + "\x0004\x0000\x0001\x0018\x0001\x0019\x0007\x0000\x0001\x001A\x0005\x0000\x0001\x001B\x0007\x0000" + "\x0001\x000B\x0004\x0000\x0001\x001C\x0001\x001D\x0007\x0000\x0001\x001E\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0007\x0000\x0001\x0021\x0004\x0000\x0001\x0022\x0001\x0023\x0007\x0000\x0001\x0024" + "\x000D\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015\x0007\x0000\x0001\x0026\x000D\x0000" + "\x0001\x0027\x0004\x0000\x0002\x0017\x0007\x0000\x0001\x0028\x0004\x0000\x0001\x0003\x0001\x0004" + "\x0001\x000F\x0001\x0008\x0001\x0011\x0001\x0012\x0002\x000A\x0001\x000B\x0001\x0013\x0004\x0000" + "\x0002\x0014\x0001\x0000\x0001\x0029\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014" + "\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x002B\x0001\x0000\x0001\x0009\x0002\x002C" + "\x0001\x002D\x0001\x0015\x0004\x0000\x0001\x0014\x0001\x0015\x0001\x0000\x0001\x0029\x0001\x0000" + "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000\x0002\x0017\x0001\x0000\x0001\x002E" + "\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0017\x0004\x0000\x0002\x0018\x0001\x0000\x0001\x002A" + "\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0018\x0004\x0000\x0001\x0018\x0001\x0019" + "\x0001\x0000\x0001\x002C\x0001\x0000\x0001\x0009\x0002\x002C\x0001\x002D\x0001\x0019\x0004\x0000" + + "\x0001\x0018\x0001\x0019\x0001\x0000\x0001\x002A\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000" + "\x0001\x001A\x0005\x0000\x0001\x001B\x0001\x0000\x0001\x002D\x0002\x0000\x0003\x002D\x0001\x001B" + "\x0004\x0000\x0002\x001C\x0001\x0000\x0001\x002F\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B" + "\x0001\x001C\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x0030\x0001\x0000\x0001\x0009" + "\x0002\x000D\x0001\x000E\x0001\x001D\x0004\x0000\x0001\x001C\x0001\x001D\x0001\x0000\x0001\x002F" + "\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001E\x0004\x0000\x0002\x001F\x0001\x0000" + "\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A\x0001\x000B\x0001\x001F\x0004\x0000\x0001\x001F" + "\x0001\x0020\x0001\x0000\x0001\x000D\x0001\x0000\x0001\x0009\x0002\x000D\x0001\x000E\x0001\x0020" + "\x0004\x0000\x0001\x001F\x0001\x0020\x0001\x0000\x0001\x000A\x0001\x0000\x0001\x0009\x0002\x000A" + "\x0001\x000B\x0001\x0021\x0004\x0000\x0002\x0022\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0022\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000E\x0002\x0000\x0003\x000E" + "\x0001\x0023\x0004\x0000\x0001\x0022\x0001\x0023\x0001\x0000\x0001\x000B\x0002\x0000\x0003\x000B" + "\x0001\x0024\x0006\x0000\x0001\x000F\x0006\x0000\x0001\x0025\x0004\x0000\x0001\x0014\x0001\x0015" + "\x0001\x0000\x0001\x0031\x0001\x0000\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0016\x0004\x0000" + "\x0002\x0017\x0001\x0000\x0001\x002E\x0002\x0000\x0001\x002E\x0002\x0000\x0001\x0028\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0014\x0004\x0000\x0002\x0018\x0007\x0000\x0001\x0018\x0004\x0000" + "\x0002\x001C\x0007\x0000\x0001\x001C\x0004\x0000\x0002\x001F\x0007\x0000\x0001\x001F\x0004\x0000" + "\x0002\x0022\x0007\x0000\x0001\x0022\x0004\x0000\x0002\x0032\x0007\x0000\x0001\x0032\x0004\x0000" + "\x0002\x0014\x0007\x0000\x0001\x0033\x0004\x0000\x0002\x0032\x0001\x0000\x0001\x002E\x0002\x0000" + "\x0001\x002E\x0002\x0000\x0001\x0032\x0004\x0000\x0002\x0014\x0001\x0000\x0001\x0031\x0001\x0000" + + "\x0001\x0009\x0002\x002A\x0001\x0000\x0001\x0014\x0003\x0000"; + + private static int[] ZzUnpackTrans() + { + int[] result = new int[658]; + int offset = 0; + offset = ZzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); + return result; + } + + private static int ZzUnpackTrans(System.String packed, int offset, int[] result) + { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.Length; + while (i < l) + { + int count = packed[i++]; + int value_Renamed = packed[i++]; + value_Renamed--; + do + result[j++] = value_Renamed; + while (--count > 0); + } + return j; + } + + + /* error codes */ + private const int ZZ_UNKNOWN_ERROR = 0; + private const int ZZ_NO_MATCH = 1; + private const int ZZ_PUSHBACK_2BIG = 2; + + /* error messages for the codes above */ + private static readonly System.String[] ZZ_ERROR_MSG = new System.String[]{"Unkown internal scanner error", "Error: could not match input", "Error: pushback value was too large"}; + + /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState + private static readonly int[] ZZ_ATTRIBUTE = ZzUnpackAttribute(); + + private const System.String ZZ_ATTRIBUTE_PACKED_0 = "\x0001\x0000\x0001\x0009\x0003\x0001\x0001\x0009\x0001\x0001\x000B\x0000\x0004\x0001\x0002\x0000" + "\x0001\x0001\x0001\x0000\x000F\x0001\x0001\x0000\x0001\x0001\x0003\x0000\x0005\x0001"; + + private static int[] ZzUnpackAttribute() + { + int[] result = new int[51]; + int offset = 0; + offset = ZzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); + return result; + } + + private static int ZzUnpackAttribute(System.String packed, int offset, int[] result) + { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.Length; + while (i < l) + { + int count = packed[i++]; + int value_Renamed = packed[i++]; + do + result[j++] = value_Renamed; + while (--count > 0); + } + return j; + } + + /// the input device + private System.IO.TextReader zzReader; + + /// the current state of the DFA + private int zzState; + + /// the current lexical state + private int zzLexicalState = YYINITIAL; + + /// this buffer contains the current text to be matched and is + /// the source of the yytext() string + /// + private char[] zzBuffer = new char[ZZ_BUFFERSIZE]; + + /// the textposition at the last accepting state + private int zzMarkedPos; + + /// the textposition at the last state to be included in yytext + private int zzPushbackPos; + + /// the current text position in the buffer + private int zzCurrentPos; + + /// startRead marks the beginning of the yytext() string in the buffer + private int zzStartRead; + + /// endRead marks the last character in the buffer, that has been read + /// from input + /// + private int zzEndRead; + + /// number of newlines encountered up to the start of the matched text + private int yyline; + + /// the number of characters up to the start of the matched text + private int yychar; + + /// the number of characters from the last newline up to the start of the + /// matched text + /// + private int yycolumn; + + /// zzAtBOL == true <=> the scanner is currently at the beginning of a line + private bool zzAtBOL = true; + + /// zzAtEOF == true <=> the scanner is at the EOF + private bool zzAtEOF; + + /* user code: */ + + public static readonly int ALPHANUM; + public static readonly int APOSTROPHE; + public static readonly int ACRONYM; + public static readonly int COMPANY; + public static readonly int EMAIL; + public static readonly int HOST; + public static readonly int NUM; + public static readonly int CJ; + /// this solves a bug where HOSTs that end with '.' are identified + /// as ACRONYMs. + /// + [Obsolete("this solves a bug where HOSTs that end with '.' are identified as ACRONYMs")] + public static readonly int ACRONYM_DEP; + + public static readonly System.String[] TOKEN_TYPES; + + public int Yychar() + { + return yychar; + } + + /* + * Resets the Tokenizer to a new Reader. + */ + internal void Reset(System.IO.TextReader r) + { + // reset to default buffer size, if buffer has grown + if (zzBuffer.Length > ZZ_BUFFERSIZE) + { + zzBuffer = new char[ZZ_BUFFERSIZE]; + } + Yyreset(r); + } + + /// Fills Lucene token with the current token text. + internal void GetText(Token t) + { + t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); + } + + /// Fills TermAttribute with the current token text. + internal void GetText(ITermAttribute t) + { + t.SetTermBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); + } + + + /// Creates a new scanner + /// There is also a java.io.InputStream version of this constructor. + /// + /// + /// the java.io.Reader to read input from. + /// + internal StandardTokenizerImpl(System.IO.TextReader in_Renamed) + { + this.zzReader = in_Renamed; + } + + /// Creates a new scanner. + /// There is also java.io.Reader version of this constructor. + /// + /// + /// the java.io.Inputstream to read input from. + /// + internal StandardTokenizerImpl(System.IO.Stream in_Renamed):this(new System.IO.StreamReader(in_Renamed, System.Text.Encoding.Default)) + { + } + + /// Unpacks the compressed character translation table. + /// + /// + /// the packed character translation table + /// + /// the unpacked character translation table + /// + private static char[] ZzUnpackCMap(System.String packed) + { + char[] map = new char[0x10000]; + int i = 0; /* index in packed string */ + int j = 0; /* index in unpacked array */ + while (i < 1154) + { + int count = packed[i++]; + char value_Renamed = packed[i++]; + do + map[j++] = value_Renamed; + while (--count > 0); + } + return map; + } + + + /// Refills the input buffer. + /// + /// false, iff there was new input. + /// + /// + /// if any I/O-Error occurs + /// + private bool ZzRefill() + { + + /* first: make room (if you can) */ + if (zzStartRead > 0) + { + Array.Copy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead); + + /* translate stored positions */ + zzEndRead -= zzStartRead; + zzCurrentPos -= zzStartRead; + zzMarkedPos -= zzStartRead; + zzPushbackPos -= zzStartRead; + zzStartRead = 0; + } + + /* is the buffer big enough? */ + if (zzCurrentPos >= zzBuffer.Length) + { + /* if not: blow it up */ + char[] newBuffer = new char[zzCurrentPos * 2]; + Array.Copy(zzBuffer, 0, newBuffer, 0, zzBuffer.Length); + zzBuffer = newBuffer; + } + + /* finally: fill the buffer with new input */ + int numRead = zzReader.Read(zzBuffer, zzEndRead, zzBuffer.Length - zzEndRead); + + if (numRead <= 0) + { + return true; + } + else + { + zzEndRead += numRead; + return false; + } + } + + + /// Closes the input stream. + public void Yyclose() + { + zzAtEOF = true; /* indicate end of file */ + zzEndRead = zzStartRead; /* invalidate buffer */ + + if (zzReader != null) + zzReader.Close(); + } + + + /// Resets the scanner to read from a new input stream. + /// Does not close the old reader. + /// + /// All internal variables are reset, the old input stream + /// cannot be reused (internal buffer is discarded and lost). + /// Lexical state is set to ZZ_INITIAL. + /// + /// + /// the new input stream + /// + public void Yyreset(System.IO.TextReader reader) + { + zzReader = reader; + zzAtBOL = true; + zzAtEOF = false; + zzEndRead = zzStartRead = 0; + zzCurrentPos = zzMarkedPos = zzPushbackPos = 0; + yyline = yychar = yycolumn = 0; + zzLexicalState = YYINITIAL; + } + + + /// Returns the current lexical state. + public int Yystate() + { + return zzLexicalState; + } + + + /// Enters a new lexical state + /// + /// + /// the new lexical state + /// + public void Yybegin(int newState) + { + zzLexicalState = newState; + } + + + /// Returns the text matched by the current regular expression. + public System.String Yytext() + { + return new System.String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); + } + + + /// Returns the character at position pos from the + /// matched text. + /// + /// It is equivalent to yytext().charAt(pos), but faster + /// + /// + /// the position of the character to fetch. + /// A value from 0 to yylength()-1. + /// + /// + /// the character at position pos + /// + public char Yycharat(int pos) + { + return zzBuffer[zzStartRead + pos]; + } + + + /// Returns the length of the matched text region. + public int Yylength() + { + return zzMarkedPos - zzStartRead; + } + + + /// Reports an error that occured while scanning. + /// + /// In a wellformed scanner (no or only correct usage of + /// yypushback(int) and a match-all fallback rule) this method + /// will only be called with things that "Can't Possibly Happen". + /// If this method is called, something is seriously wrong + /// (e.g. a JFlex bug producing a faulty scanner etc.). + /// + /// Usual syntax/scanner level error handling should be done + /// in error fallback rules. + /// + /// + /// the code of the errormessage to display + /// + private void ZzScanError(int errorCode) + { + System.String message; + try + { + message = ZZ_ERROR_MSG[errorCode]; + } + catch (System.IndexOutOfRangeException) + { + message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; + } + + throw new System.ApplicationException(message); + } + + + /// Pushes the specified amount of characters back into the input stream. + /// + /// They will be read again by then next call of the scanning method + /// + /// + /// the number of characters to be read again. + /// This number must not be greater than yylength()! + /// + public virtual void Yypushback(int number) + { + if (number > Yylength()) + ZzScanError(ZZ_PUSHBACK_2BIG); + + zzMarkedPos -= number; + } + + + /// Resumes scanning until the next regular expression is matched, + /// the end of input is encountered or an I/O-Error occurs. + /// + /// + /// the next token + /// + /// if any I/O-Error occurs + /// + public virtual int GetNextToken() + { + int zzInput; + int zzAction; + + // cached fields: + int zzCurrentPosL; + int zzMarkedPosL; + int zzEndReadL = zzEndRead; + char[] zzBufferL = zzBuffer; + char[] zzCMapL = ZZ_CMAP; + + int[] zzTransL = ZZ_TRANS; + int[] zzRowMapL = ZZ_ROWMAP; + int[] zzAttrL = ZZ_ATTRIBUTE; + + while (true) + { + zzMarkedPosL = zzMarkedPos; + + yychar += zzMarkedPosL - zzStartRead; + + zzAction = - 1; + + zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; + + zzState = zzLexicalState; + + + { + while (true) + { + + if (zzCurrentPosL < zzEndReadL) + zzInput = zzBufferL[zzCurrentPosL++]; + else if (zzAtEOF) + { + zzInput = YYEOF; + goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place + } + else + { + // store back cached positions + zzCurrentPos = zzCurrentPosL; + zzMarkedPos = zzMarkedPosL; + bool eof = ZzRefill(); + // get translated positions and possibly new buffer + zzCurrentPosL = zzCurrentPos; + zzMarkedPosL = zzMarkedPos; + zzBufferL = zzBuffer; + zzEndReadL = zzEndRead; + if (eof) + { + zzInput = YYEOF; + goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place + } + else + { + zzInput = zzBufferL[zzCurrentPosL++]; + } + } + int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; + if (zzNext == - 1) + { + goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place + } + zzState = zzNext; + + int zzAttributes = zzAttrL[zzState]; + if ((zzAttributes & 1) == 1) + { + zzAction = zzState; + zzMarkedPosL = zzCurrentPosL; + if ((zzAttributes & 8) == 8) + { + goto zzForAction_brk; // {{Aroush-2.9}} this 'goto' maybe in the wrong place + } + } + } + } + +zzForAction_brk: ; // {{Aroush-2.9}} this 'lable' maybe in the wrong place + + + // store back cached position + zzMarkedPos = zzMarkedPosL; + + switch (zzAction < 0?zzAction:ZZ_ACTION[zzAction]) + { + + case 4: + { + return HOST; + } + + case 11: break; + + case 9: + { + return ACRONYM; + } + + case 12: break; + + case 8: + { + return ACRONYM_DEP; + } + + case 13: break; + + case 1: + { + /* ignore */ + } + goto case 14; + + case 14: break; + + case 5: + { + return NUM; + } + + case 15: break; + + case 3: + { + return CJ; + } + + case 16: break; + + case 2: + { + return ALPHANUM; + } + + case 17: break; + + case 7: + { + return COMPANY; + } + + case 18: break; + + case 6: + { + return APOSTROPHE; + } + + case 19: break; + + case 10: + { + return EMAIL; + } + + case 20: break; + + default: + if (zzInput == YYEOF && zzStartRead == zzCurrentPos) + { + zzAtEOF = true; + return YYEOF; + } + else + { + ZzScanError(ZZ_NO_MATCH); + } + break; + + } + } + } + static StandardTokenizerImpl() + { + ALPHANUM = StandardTokenizer.ALPHANUM; + APOSTROPHE = StandardTokenizer.APOSTROPHE; + ACRONYM = StandardTokenizer.ACRONYM; + COMPANY = StandardTokenizer.COMPANY; + EMAIL = StandardTokenizer.EMAIL; + HOST = StandardTokenizer.HOST; + NUM = StandardTokenizer.NUM; + CJ = StandardTokenizer.CJ; + ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP; + TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/StopAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/StopAnalyzer.cs new file mode 100644 index 0000000000..aabe19746f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/StopAnalyzer.cs @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; +using Version = Lucene.Net.Util.Version; + +namespace Lucene.Net.Analysis +{ + + /// Filters with and + /// . + /// + /// + ///

+ /// You must specify the required compatibility when creating + /// StopAnalyzer: + /// + /// As of 2.9, position increments are preserved + /// + ///

+ + public sealed class StopAnalyzer:Analyzer + { + private readonly ISet stopWords; + private readonly bool enablePositionIncrements; + + /// An unmodifiable set containing some common English words that are not usually useful + /// for searching. + /// + public static ISet ENGLISH_STOP_WORDS_SET; + + /// Builds an analyzer which removes words in ENGLISH_STOP_WORDS. + public StopAnalyzer(Version matchVersion) + { + stopWords = ENGLISH_STOP_WORDS_SET; + enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion); + } + + /// Builds an analyzer with the stop words from the given set. + public StopAnalyzer(Version matchVersion, ISet stopWords) + { + this.stopWords = stopWords; + enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion); + } + + /// Builds an analyzer with the stop words from the given file. + /// + /// + /// + /// + /// See above + /// + /// File to load stop words from + /// + public StopAnalyzer(Version matchVersion, System.IO.FileInfo stopwordsFile) + { + stopWords = WordlistLoader.GetWordSet(stopwordsFile); + enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion); + } + + /// Builds an analyzer with the stop words from the given reader. + /// + /// + /// See above + /// + /// Reader to load stop words from + /// + public StopAnalyzer(Version matchVersion, System.IO.TextReader stopwords) + { + stopWords = WordlistLoader.GetWordSet(stopwords); + enablePositionIncrements = StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion); + } + + /// Filters LowerCaseTokenizer with StopFilter. + public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) + { + return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords); + } + + /// Filters LowerCaseTokenizer with StopFilter. + private class SavedStreams + { + public SavedStreams(StopAnalyzer enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(StopAnalyzer enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private StopAnalyzer enclosingInstance; + public StopAnalyzer Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal Tokenizer source; + internal TokenStream result; + } + + public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) + { + var streams = (SavedStreams) PreviousTokenStream; + if (streams == null) + { + streams = new SavedStreams(this) {source = new LowerCaseTokenizer(reader)}; + streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords); + PreviousTokenStream = streams; + } + else + streams.source.Reset(reader); + return streams.result; + } + static StopAnalyzer() + { + { + var stopWords = new System.String[]{"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"}; + var stopSet = new CharArraySet(stopWords.Length, false); + stopSet.AddAll(stopWords); + ENGLISH_STOP_WORDS_SET = CharArraySet.UnmodifiableSet(stopSet); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/StopFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/StopFilter.cs new file mode 100644 index 0000000000..81b7dd0b37 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/StopFilter.cs @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Util; +using QueryParser = Lucene.Net.QueryParsers.QueryParser; +using Version = Lucene.Net.Util.Version; + +namespace Lucene.Net.Analysis +{ + + /// Removes stop words from a token stream. + + public sealed class StopFilter:TokenFilter + { + private readonly CharArraySet stopWords; + private bool enablePositionIncrements = false; + + private readonly ITermAttribute termAtt; + private readonly IPositionIncrementAttribute posIncrAtt; + + /// Construct a token stream filtering the given input. + /// If stopWords is an instance of (true if + /// makeStopSet() was used to construct the set) it will be directly used + /// and ignoreCase will be ignored since CharArraySet + /// directly controls case sensitivity. + ///

+ /// If stopWords is not an instance of , + /// a new CharArraySet will be constructed and ignoreCase will be + /// used to specify the case sensitivity of that set. + ///

+ /// true if token positions should record the removed stop words + /// Input TokenStream + /// A Set of strings or strings or char[] or any other ToString()-able set representing the stopwords + /// if true, all words are lower cased first + public StopFilter(bool enablePositionIncrements, TokenStream input, ISet stopWords, bool ignoreCase) + : base(input) + { + if (stopWords is CharArraySet) + { + this.stopWords = (CharArraySet) stopWords; + } + else + { + this.stopWords = new CharArraySet(stopWords.Count, ignoreCase); + this.stopWords.AddAll(stopWords); + } + this.enablePositionIncrements = enablePositionIncrements; + termAtt = AddAttribute(); + posIncrAtt = AddAttribute(); + } + + /// Constructs a filter which removes words from the input + /// TokenStream that are named in the Set. + /// + /// true if token positions should record the removed stop words + /// Input stream + /// A Set of strings or char[] or any other ToString()-able set representing the stopwords + /// + public StopFilter(bool enablePositionIncrements, TokenStream @in, ISet stopWords) + : this(enablePositionIncrements, @in, stopWords, false) + { } + + /// Builds a Set from an array of stop words, + /// appropriate for passing into the StopFilter constructor. + /// This permits this stopWords construction to be cached once when + /// an Analyzer is constructed. + /// + /// + /// passing false to ignoreCase + public static ISet MakeStopSet(params string[] stopWords) + { + return MakeStopSet(stopWords, false); + } + + /// Builds a Set from an array of stop words, + /// appropriate for passing into the StopFilter constructor. + /// This permits this stopWords construction to be cached once when + /// an Analyzer is constructed. + /// + /// A list of strings or char[] or any other ToString()-able list representing the stop words + /// passing false to ignoreCase + public static ISet MakeStopSet(IList stopWords) + { + return MakeStopSet(stopWords, false); + } + + /// + /// An array of stopwords + /// If true, all words are lower cased first. + /// a Set containing the words + public static ISet MakeStopSet(string[] stopWords, bool ignoreCase) + { + var stopSet = new CharArraySet(stopWords.Length, ignoreCase); + stopSet.AddAll(stopWords); + return stopSet; + } + + /// + /// A List of Strings or char[] or any other toString()-able list representing the stopwords + /// if true, all words are lower cased first + /// A Set ()containing the words + public static ISet MakeStopSet(IList stopWords, bool ignoreCase) + { + var stopSet = new CharArraySet(stopWords.Count, ignoreCase); + foreach(var word in stopWords) + stopSet.Add(word.ToString()); + return stopSet; + } + + /// Returns the next input Token whose term() is not a stop word. + public override bool IncrementToken() + { + // return the first non-stop word found + int skippedPositions = 0; + while (input.IncrementToken()) + { + if (!stopWords.Contains(termAtt.TermBuffer(), 0, termAtt.TermLength())) + { + if (enablePositionIncrements) + { + posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions; + } + return true; + } + skippedPositions += posIncrAtt.PositionIncrement; + } + // reached EOS -- return false + return false; + } + + /// Returns version-dependent default for enablePositionIncrements. Analyzers + /// that embed StopFilter use this method when creating the StopFilter. Prior + /// to 2.9, this returns false. On 2.9 or later, it returns true. + /// + public static bool GetEnablePositionIncrementsVersionDefault(Version matchVersion) + { + return matchVersion.OnOrAfter(Version.LUCENE_29); + } + + /// If true, this StopFilter will preserve + /// positions of the incoming tokens (ie, accumulate and + /// set position increments of the removed stop tokens). + /// Generally, true is best as it does not + /// lose information (positions of the original tokens) + /// during indexing. + /// + ///

When set, when a token is stopped + /// (omitted), the position increment of the following + /// token is incremented. + /// + ///

NOTE: be sure to also + /// set if + /// you use QueryParser to create queries. + ///

+ public bool EnablePositionIncrements + { + get { return enablePositionIncrements; } + set { enablePositionIncrements = value; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/TeeSinkTokenFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/TeeSinkTokenFilter.cs new file mode 100644 index 0000000000..bec605e226 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/TeeSinkTokenFilter.cs @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Attribute = Lucene.Net.Util.Attribute; +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// This TokenFilter provides the ability to set aside attribute states + /// that have already been analyzed. This is useful in situations where multiple fields share + /// many common analysis steps and then go their separate ways. + ///

+ /// It is also useful for doing things like entity extraction or proper noun analysis as + /// part of the analysis workflow and saving off those tokens for use in another field. + /// + /// + /// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1)); + /// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream(); + /// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream(); + /// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2)); + /// source2.addSinkTokenStream(sink1); + /// source2.addSinkTokenStream(sink2); + /// TokenStream final1 = new LowerCaseFilter(source1); + /// TokenStream final2 = source2; + /// TokenStream final3 = new EntityDetect(sink1); + /// TokenStream final4 = new URLDetect(sink2); + /// d.add(new Field("f1", final1)); + /// d.add(new Field("f2", final2)); + /// d.add(new Field("f3", final3)); + /// d.add(new Field("f4", final4)); + /// + /// In this example, sink1 and sink2 will both get tokens from both + /// reader1 and reader2 after whitespace tokenizer + /// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. + /// It is important, that tees are consumed before sinks (in the above example, the field names must be + /// less the sink's field names). If you are not sure, which stream is consumed first, you can simply + /// add another sink and then pass all tokens to the sinks at once using . + /// This TokenFilter is exhausted after this. In the above example, change + /// the example above to: + /// + /// ... + /// TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream()); + /// TokenStream final2 = source2.newSinkTokenStream(); + /// sink1.consumeAllTokens(); + /// sink2.consumeAllTokens(); + /// ... + /// + /// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready. + ///

Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. + ///

+ public sealed class TeeSinkTokenFilter:TokenFilter + { + public class AnonymousClassSinkFilter:SinkFilter + { + public override bool Accept(AttributeSource source) + { + return true; + } + } + private readonly LinkedList sinks = new LinkedList(); + + /// Instantiates a new TeeSinkTokenFilter. + public TeeSinkTokenFilter(TokenStream input):base(input) + { + } + + /// Returns a new that receives all tokens consumed by this stream. + public SinkTokenStream NewSinkTokenStream() + { + return NewSinkTokenStream(ACCEPT_ALL_FILTER); + } + + /// Returns a new that receives all tokens consumed by this stream + /// that pass the supplied filter. + /// + /// + /// + public SinkTokenStream NewSinkTokenStream(SinkFilter filter) + { + var sink = new SinkTokenStream(this.CloneAttributes(), filter); + sinks.AddLast(new WeakReference(sink)); + return sink; + } + + /// Adds a created by another TeeSinkTokenFilter + /// to this one. The supplied stream will also receive all consumed tokens. + /// This method can be used to pass tokens from two different tees to one sink. + /// + public void AddSinkTokenStream(SinkTokenStream sink) + { + // check that sink has correct factory + if (!this.Factory.Equals(sink.Factory)) + { + throw new System.ArgumentException("The supplied sink is not compatible to this tee"); + } + // add eventually missing attribute impls to the existing sink + foreach (var impl in this.CloneAttributes().GetAttributeImplsIterator()) + { + sink.AddAttributeImpl(impl); + } + sinks.AddLast(new WeakReference(sink)); + } + + /// TeeSinkTokenFilter passes all tokens to the added sinks + /// when itself is consumed. To be sure, that all tokens from the input + /// stream are passed to the sinks, you can call this methods. + /// This instance is exhausted after this, but all sinks are instant available. + /// + public void ConsumeAllTokens() + { + while (IncrementToken()) + { + } + } + + public override bool IncrementToken() + { + if (input.IncrementToken()) + { + // capture state lazily - maybe no SinkFilter accepts this state + State state = null; + foreach(WeakReference wr in sinks) + { + var sink = (SinkTokenStream)wr.Target; + if (sink != null) + { + if (sink.Accept(this)) + { + if (state == null) + { + state = this.CaptureState(); + } + sink.AddState(state); + } + } + } + return true; + } + + return false; + } + + public override void End() + { + base.End(); + State finalState = CaptureState(); + foreach(WeakReference wr in sinks) + { + var sink = (SinkTokenStream)wr.Target; + if (sink != null) + { + sink.SetFinalState(finalState); + } + } + } + + /// A filter that decides which states to store in the sink. + public abstract class SinkFilter + { + /// Returns true, iff the current state of the passed-in shall be stored + /// in the sink. + /// + public abstract bool Accept(AttributeSource source); + + /// Called by . This method does nothing by default + /// and can optionally be overridden. + /// + public virtual void Reset() + { + // nothing to do; can be overridden + } + } + + public sealed class SinkTokenStream : TokenStream + { + private readonly LinkedList cachedStates = new LinkedList(); + private State finalState; + private IEnumerator it = null; + private readonly SinkFilter filter; + + internal SinkTokenStream(AttributeSource source, SinkFilter filter) + : base(source) + { + this.filter = filter; + } + + internal /*private*/ bool Accept(AttributeSource source) + { + return filter.Accept(source); + } + + internal /*private*/ void AddState(AttributeSource.State state) + { + if (it != null) + { + throw new System.SystemException("The tee must be consumed before sinks are consumed."); + } + cachedStates.AddLast(state); + } + + internal /*private*/ void SetFinalState(AttributeSource.State finalState) + { + this.finalState = finalState; + } + + public override bool IncrementToken() + { + // lazy init the iterator + if (it == null) + { + it = cachedStates.GetEnumerator(); + } + + if (!it.MoveNext()) + { + return false; + } + + State state = it.Current; + RestoreState(state); + return true; + } + + public override void End() + { + if (finalState != null) + { + RestoreState(finalState); + } + } + + public override void Reset() + { + it = cachedStates.GetEnumerator(); + } + + protected override void Dispose(bool disposing) + { + // Do nothing. + } + } + + private static readonly SinkFilter ACCEPT_ALL_FILTER; + static TeeSinkTokenFilter() + { + ACCEPT_ALL_FILTER = new AnonymousClassSinkFilter(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Token.cs b/external/Lucene.Net.Light/src/core/Analysis/Token.cs new file mode 100644 index 0000000000..3357f3417f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Token.cs @@ -0,0 +1,852 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Support; +using Lucene.Net.Util; +using Payload = Lucene.Net.Index.Payload; +using TermPositions = Lucene.Net.Index.TermPositions; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; +using Attribute = Lucene.Net.Util.Attribute; + +namespace Lucene.Net.Analysis +{ + + /// A Token is an occurrence of a term from the text of a field. It consists of + /// a term's text, the start and end offset of the term in the text of the field, + /// and a type string. + ///

+ /// The start and end offsets permit applications to re-associate a token with + /// its source text, e.g., to display highlighted query terms in a document + /// browser, or to show matching text fragments in a KWIC display, etc. + ///

+ /// The type is a string, assigned by a lexical analyzer + /// (a.k.a. tokenizer), naming the lexical or syntactic class that the token + /// belongs to. For example an end of sentence marker token might be implemented + /// with type "eos". The default token type is "word". + ///

+ /// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable + /// length byte array. Use and + /// to retrieve the payloads from the index. + ///

+ ///

+ ///
+ ///

NOTE: As of 2.9, Token implements all interfaces + /// that are part of core Lucene and can be found in the namespace. + /// Even though it is not necessary to use Token anymore, with the new TokenStream API it can + /// be used as convenience class that implements all s, which is especially useful + /// to easily switch from the old to the new TokenStream API. + ///

+ ///

Tokenizers and TokenFilters should try to re-use a Token instance when + /// possible for best performance, by implementing the + /// API. + /// Failing that, to create a new Token you should first use + /// one of the constructors that starts with null text. To load + /// the token from a char[] use . + /// To load from a String use or . + /// Alternatively you can get the Token's termBuffer by calling either , + /// if you know that your text is shorter than the capacity of the termBuffer + /// or , if there is any possibility + /// that you may need to grow the buffer. Fill in the characters of your term into this + /// buffer, with if loading from a string, + /// or with , and finally call to + /// set the length of the term text. See LUCENE-969 + /// for details.

+ ///

Typical Token reuse patterns: + /// + /// Copying text from a string (type is reset to if not + /// specified):
+ /// + /// return reusableToken.reinit(string, startOffset, endOffset[, type]); + /// + ///
+ /// Copying some text from a string (type is reset to + /// if not specified):
+ /// + /// return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]); + /// + ///
+ /// Copying text from char[] buffer (type is reset to + /// if not specified):
+ /// + /// return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]); + /// + ///
+ /// Copying some text from a char[] buffer (type is reset to + /// if not specified):
+ /// + /// return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]); + /// + ///
+ /// Copying from one one Token to another (type is reset to + /// if not specified):
+ /// + /// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]); + /// + ///
+ ///
+ /// A few things to note: + /// + /// clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one. + /// Because TokenStreams can be chained, one cannot assume that the Token's current type is correct. + /// The startOffset and endOffset represent the start and offset in the + /// source text, so be careful in adjusting them. + /// When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again. + /// + ///

+ ///

+ /// + /// + [Serializable] + public class Token : Attribute, ITermAttribute, ITypeAttribute, IPositionIncrementAttribute, IFlagsAttribute, IOffsetAttribute, IPayloadAttribute + { + public const String DEFAULT_TYPE = "word"; + + private const int MIN_BUFFER_SIZE = 10; + + private char[] termBuffer; + private int termLength; + private int startOffset, endOffset; + private string type = DEFAULT_TYPE; + private int flags; + private Payload payload; + private int positionIncrement = 1; + + /// Constructs a Token will null text. + public Token() + { + } + + /// Constructs a Token with null text and start & end + /// offsets. + /// + /// start offset in the source text + /// end offset in the source text + public Token(int start, int end) + { + startOffset = start; + endOffset = end; + } + + /// Constructs a Token with null text and start & end + /// offsets plus the Token type. + /// + /// start offset in the source text + /// end offset in the source text + /// the lexical type of this Token + public Token(int start, int end, String typ) + { + startOffset = start; + endOffset = end; + type = typ; + } + + /// Constructs a Token with null text and start & end + /// offsets plus flags. NOTE: flags is EXPERIMENTAL. + /// + /// start offset in the source text + /// end offset in the source text + /// The bits to set for this token + public Token(int start, int end, int flags) + { + startOffset = start; + endOffset = end; + this.flags = flags; + } + + /// Constructs a Token with the given term text, and start + /// & end offsets. The type defaults to "word." + /// NOTE: for better indexing speed you should + /// instead use the char[] termBuffer methods to set the + /// term text. + /// + /// term text + /// start offset + /// end offset + public Token(String text, int start, int end) + { + SetTermBuffer(text); + startOffset = start; + endOffset = end; + } + + /// Constructs a Token with the given text, start and end + /// offsets, & type. NOTE: for better indexing + /// speed you should instead use the char[] termBuffer + /// methods to set the term text. + /// + /// term text + /// start offset + /// end offset + /// token type + public Token(System.String text, int start, int end, System.String typ) + { + SetTermBuffer(text); + startOffset = start; + endOffset = end; + type = typ; + } + + /// Constructs a Token with the given text, start and end + /// offsets, & type. NOTE: for better indexing + /// speed you should instead use the char[] termBuffer + /// methods to set the term text. + /// + /// + /// + /// + /// token type bits + public Token(System.String text, int start, int end, int flags) + { + SetTermBuffer(text); + startOffset = start; + endOffset = end; + this.flags = flags; + } + + /// Constructs a Token with the given term buffer (offset + /// & length), start and end + /// offsets + /// + /// + /// + /// + /// + /// + public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) + { + SetTermBuffer(startTermBuffer, termBufferOffset, termBufferLength); + startOffset = start; + endOffset = end; + } + + /// Set the position increment. This determines the position of this token + /// relative to the previous Token in a , used in phrase + /// searching. + /// + ///

The default value is one. + /// + ///

Some common uses for this are: + /// + /// Set it to zero to put multiple terms in the same position. This is + /// useful if, e.g., a word has multiple stems. Searches for phrases + /// including either stem will match. In this case, all but the first stem's + /// increment should be set to zero: the increment of the first instance + /// should be one. Repeating a token with an increment of zero can also be + /// used to boost the scores of matches on that token. + /// + /// Set it to values greater than one to inhibit exact phrase matches. + /// If, for example, one does not want phrases to match across removed stop + /// words, then one could build a stop word filter that removes stop words and + /// also sets the increment to the number of stop words removed before each + /// non-stop word. Then exact phrase queries will only match when the terms + /// occur with no intervening stop words. + /// + /// + ///

+ /// the distance from the prior term + /// + /// + public virtual int PositionIncrement + { + set + { + if (value < 0) + throw new System.ArgumentException("Increment must be zero or greater: " + value); + this.positionIncrement = value; + } + get { return positionIncrement; } + } + + /// Returns the Token's term text. + /// + /// This method has a performance penalty + /// because the text is stored internally in a char[]. If + /// possible, use and + /// directly instead. If you really need a + /// String, use this method, which is nothing more than + /// a convenience call to new String(token.termBuffer(), 0, token.termLength()) + /// + public string Term + { + get + { + InitTermBuffer(); + return new System.String(termBuffer, 0, termLength); + } + } + + /// Copies the contents of buffer, starting at offset for + /// length characters, into the termBuffer array. + /// + /// the buffer to copy + /// the index in the buffer of the first character to copy + /// the number of characters to copy + public void SetTermBuffer(char[] buffer, int offset, int length) + { + GrowTermBuffer(length); + Array.Copy(buffer, offset, termBuffer, 0, length); + termLength = length; + } + + /// Copies the contents of buffer into the termBuffer array. + /// the buffer to copy + /// + public void SetTermBuffer(System.String buffer) + { + int length = buffer.Length; + GrowTermBuffer(length); + TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0); + termLength = length; + } + + /// Copies the contents of buffer, starting at offset and continuing + /// for length characters, into the termBuffer array. + /// + /// the buffer to copy + /// + /// the index in the buffer of the first character to copy + /// + /// the number of characters to copy + /// + public void SetTermBuffer(System.String buffer, int offset, int length) + { + System.Diagnostics.Debug.Assert(offset <= buffer.Length); + System.Diagnostics.Debug.Assert(offset + length <= buffer.Length); + GrowTermBuffer(length); + TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0); + termLength = length; + } + + /// Returns the internal termBuffer character array which + /// you can then directly alter. If the array is too + /// small for your token, use + /// to increase it. After + /// altering the buffer be sure to call + /// to record the number of valid + /// characters that were placed into the termBuffer. + /// + public char[] TermBuffer() + { + InitTermBuffer(); + return termBuffer; + } + + /// Grows the termBuffer to at least size newSize, preserving the + /// existing content. Note: If the next operation is to change + /// the contents of the term buffer use + /// , + /// , or + /// + /// to optimally combine the resize with the setting of the termBuffer. + /// + /// minimum size of the new termBuffer + /// + /// newly created termBuffer with length >= newSize + /// + public virtual char[] ResizeTermBuffer(int newSize) + { + if (termBuffer == null) + { + termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)]; + } + else + { + if (termBuffer.Length < newSize) + { + // Not big enough; create a new array with slight + // over allocation and preserve content + var newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)]; + Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length); + termBuffer = newCharBuffer; + } + } + return termBuffer; + } + + /// Allocates a buffer char[] of at least newSize, without preserving the existing content. + /// its always used in places that set the content + /// + /// minimum size of the buffer + /// + private void GrowTermBuffer(int newSize) + { + if (termBuffer == null) + { + // The buffer is always at least MIN_BUFFER_SIZE + termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)]; + } + else + { + if (termBuffer.Length < newSize) + { + // Not big enough; create a new array with slight + // over allocation: + termBuffer = new char[ArrayUtil.GetNextSize(newSize)]; + } + } + } + + private void InitTermBuffer() + { + if (termBuffer == null) + { + termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)]; + termLength = 0; + } + } + + /// Return number of valid characters (length of the term) + /// in the termBuffer array. + /// + public int TermLength() + { + InitTermBuffer(); + return termLength; + } + + /// Set number of valid characters (length of the term) in + /// the termBuffer array. Use this to truncate the termBuffer + /// or to synchronize with external manipulation of the termBuffer. + /// Note: to grow the size of the array, + /// use first. + /// + /// the truncated length + /// + public void SetTermLength(int length) + { + InitTermBuffer(); + if (length > termBuffer.Length) + throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")"); + termLength = length; + } + + /// Gets or sets this Token's starting offset, the position of the first character + /// corresponding to this token in the source text. + /// Note that the difference between endOffset() and startOffset() may not be + /// equal to , as the term text may have been altered by a + /// stemmer or some other filter. + /// + public virtual int StartOffset + { + get { return startOffset; } + set { this.startOffset = value; } + } + + /// Gets or sets this Token's ending offset, one greater than the position of the + /// last character corresponding to this token in the source text. The length + /// of the token in the source text is (endOffset - startOffset). + /// + public virtual int EndOffset + { + get { return endOffset; } + set { this.endOffset = value; } + } + + /// Set the starting and ending offset. + /// See StartOffset() and EndOffset() + /// + public virtual void SetOffset(int startOffset, int endOffset) + { + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + /// Returns this Token's lexical type. Defaults to "word". + public string Type + { + get { return type; } + set { this.type = value; } + } + + /// EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. + ///

+ /// + /// Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + /// The flags can be used to encode information about the token for use by other s. + /// + /// + ///

+ /// The bits + public virtual int Flags + { + get { return flags; } + set { flags = value; } + } + + /// Returns this Token's payload. + public virtual Payload Payload + { + get { return payload; } + set { payload = value; } + } + + public override String ToString() + { + var sb = new System.Text.StringBuilder(); + sb.Append('('); + InitTermBuffer(); + if (termBuffer == null) + sb.Append("null"); + else + sb.Append(termBuffer, 0, termLength); + sb.Append(',').Append(startOffset).Append(',').Append(endOffset); + if (!type.Equals("word")) + sb.Append(",type=").Append(type); + if (positionIncrement != 1) + sb.Append(",posIncr=").Append(positionIncrement); + sb.Append(')'); + return sb.ToString(); + } + + /// Resets the term text, payload, flags, and positionIncrement, + /// startOffset, endOffset and token type to default. + /// + public override void Clear() + { + payload = null; + // Leave termBuffer to allow re-use + termLength = 0; + positionIncrement = 1; + flags = 0; + startOffset = endOffset = 0; + type = DEFAULT_TYPE; + } + + public override System.Object Clone() + { + var t = (Token) base.Clone(); + // Do a deep clone + if (termBuffer != null) + { + t.termBuffer = new char[termBuffer.Length]; + termBuffer.CopyTo(t.termBuffer, 0); + } + if (payload != null) + { + t.payload = (Payload) payload.Clone(); + } + return t; + } + + /// Makes a clone, but replaces the term buffer & + /// start/end offset in the process. This is more + /// efficient than doing a full clone (and then calling + /// setTermBuffer) because it saves a wasted copy of the old + /// termBuffer. + /// + public virtual Token Clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) + { + var t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset) + {positionIncrement = positionIncrement, flags = flags, type = type}; + if (payload != null) + t.payload = (Payload) payload.Clone(); + return t; + } + + public override bool Equals(Object obj) + { + if (obj == this) + return true; + + var other = obj as Token; + if (other == null) + return false; + + InitTermBuffer(); + other.InitTermBuffer(); + + if (termLength == other.termLength && startOffset == other.startOffset && endOffset == other.endOffset && + flags == other.flags && positionIncrement == other.positionIncrement && SubEqual(type, other.type) && + SubEqual(payload, other.payload)) + { + for (int i = 0; i < termLength; i++) + if (termBuffer[i] != other.termBuffer[i]) + return false; + return true; + } + return false; + } + + private bool SubEqual(System.Object o1, System.Object o2) + { + if (o1 == null) + return o2 == null; + return o1.Equals(o2); + } + + public override int GetHashCode() + { + InitTermBuffer(); + int code = termLength; + code = code * 31 + startOffset; + code = code * 31 + endOffset; + code = code * 31 + flags; + code = code * 31 + positionIncrement; + code = code * 31 + type.GetHashCode(); + code = (payload == null?code:code * 31 + payload.GetHashCode()); + code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength); + return code; + } + + // like clear() but doesn't clear termBuffer/text + private void ClearNoTermBuffer() + { + payload = null; + positionIncrement = 1; + flags = 0; + startOffset = endOffset = 0; + type = DEFAULT_TYPE; + } + + /// Shorthand for calling , + /// , + /// , + /// , + /// + /// + /// this Token instance + /// + public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType) + { + ClearNoTermBuffer(); + payload = null; + positionIncrement = 1; + SetTermBuffer(newTermBuffer, newTermOffset, newTermLength); + startOffset = newStartOffset; + endOffset = newEndOffset; + type = newType; + return this; + } + + /// Shorthand for calling , + /// , + /// , + /// + /// on Token.DEFAULT_TYPE + /// + /// this Token instance + /// + public virtual Token Reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) + { + ClearNoTermBuffer(); + SetTermBuffer(newTermBuffer, newTermOffset, newTermLength); + startOffset = newStartOffset; + endOffset = newEndOffset; + type = DEFAULT_TYPE; + return this; + } + + /// Shorthand for calling , + /// , + /// , + /// + /// + /// + /// this Token instance + /// + public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset, System.String newType) + { + ClearNoTermBuffer(); + SetTermBuffer(newTerm); + startOffset = newStartOffset; + endOffset = newEndOffset; + type = newType; + return this; + } + + /// Shorthand for calling , + /// , + /// , + /// + /// + /// + /// this Token instance + /// + public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, System.String newType) + { + ClearNoTermBuffer(); + SetTermBuffer(newTerm, newTermOffset, newTermLength); + startOffset = newStartOffset; + endOffset = newEndOffset; + type = newType; + return this; + } + + /// Shorthand for calling , + /// , + /// , + /// + /// on Token.DEFAULT_TYPE + /// + /// this Token instance + /// + public virtual Token Reinit(System.String newTerm, int newStartOffset, int newEndOffset) + { + ClearNoTermBuffer(); + SetTermBuffer(newTerm); + startOffset = newStartOffset; + endOffset = newEndOffset; + type = DEFAULT_TYPE; + return this; + } + + /// Shorthand for calling , + /// , + /// , + /// + /// on Token.DEFAULT_TYPE + /// + /// this Token instance + /// + public virtual Token Reinit(System.String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) + { + ClearNoTermBuffer(); + SetTermBuffer(newTerm, newTermOffset, newTermLength); + startOffset = newStartOffset; + endOffset = newEndOffset; + type = DEFAULT_TYPE; + return this; + } + + /// Copy the prototype token's fields into this one. Note: Payloads are shared. + /// + /// + public virtual void Reinit(Token prototype) + { + prototype.InitTermBuffer(); + SetTermBuffer(prototype.termBuffer, 0, prototype.termLength); + positionIncrement = prototype.positionIncrement; + flags = prototype.flags; + startOffset = prototype.startOffset; + endOffset = prototype.endOffset; + type = prototype.type; + payload = prototype.payload; + } + + /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + /// + /// + /// + /// + public virtual void Reinit(Token prototype, System.String newTerm) + { + SetTermBuffer(newTerm); + positionIncrement = prototype.positionIncrement; + flags = prototype.flags; + startOffset = prototype.startOffset; + endOffset = prototype.endOffset; + type = prototype.type; + payload = prototype.payload; + } + + /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + /// + /// + /// + /// + /// + /// + /// + /// + public virtual void Reinit(Token prototype, char[] newTermBuffer, int offset, int length) + { + SetTermBuffer(newTermBuffer, offset, length); + positionIncrement = prototype.positionIncrement; + flags = prototype.flags; + startOffset = prototype.startOffset; + endOffset = prototype.endOffset; + type = prototype.type; + payload = prototype.payload; + } + + public override void CopyTo(Attribute target) + { + if (target is Token) + { + var to = (Token) target; + to.Reinit(this); + // reinit shares the payload, so clone it: + if (payload != null) + { + to.payload = (Payload) payload.Clone(); + } + } + else + { + InitTermBuffer(); + ((ITermAttribute) target).SetTermBuffer(termBuffer, 0, termLength); + ((IOffsetAttribute) target).SetOffset(startOffset, endOffset); + ((IPositionIncrementAttribute) target).PositionIncrement = positionIncrement; + ((IPayloadAttribute) target).Payload = (payload == null)?null:(Payload) payload.Clone(); + ((IFlagsAttribute) target).Flags = flags; + ((ITypeAttribute) target).Type = type; + } + } + + /// + /// Convenience factory that returns Token as implementation for the basic + /// attributes and return the default impl (with "Impl" appended) for all other + /// attributes. + /// @since 3.0 + /// + public static AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY = + new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY); + + /// + /// Expert: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes + /// and for all other attributes calls the given delegate factory. + /// + public class TokenAttributeFactory : AttributeSource.AttributeFactory + { + + private readonly AttributeSource.AttributeFactory _delegateFactory; + + /// + /// Expert: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes + /// and for all other attributes calls the given delegate factory. + /// + public TokenAttributeFactory(AttributeSource.AttributeFactory delegateFactory) + { + this._delegateFactory = delegateFactory; + } + + public override Attribute CreateAttributeInstance() + { + return typeof(T).IsAssignableFrom(typeof(Token)) + ? new Token() + : _delegateFactory.CreateAttributeInstance(); + } + + public override bool Equals(Object other) + { + if (this == other) return true; + + var af = other as TokenAttributeFactory; + return af != null && _delegateFactory.Equals(af._delegateFactory); + } + + public override int GetHashCode() + { + return _delegateFactory.GetHashCode() ^ 0x0a45aa31; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/TokenFilter.cs b/external/Lucene.Net.Light/src/core/Analysis/TokenFilter.cs new file mode 100644 index 0000000000..7483c82623 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/TokenFilter.cs @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Analysis +{ + + /// A TokenFilter is a TokenStream whose input is another TokenStream. + ///

+ /// This is an abstract class; subclasses must override . + /// + ///

+ /// + /// + public abstract class TokenFilter:TokenStream + { + /// The source of tokens for this filter. + protected internal TokenStream input; + + private bool isDisposed; + + /// Construct a token stream filtering the given input. + protected internal TokenFilter(TokenStream input):base(input) + { + this.input = input; + } + + /// Performs end-of-stream operations, if any, and calls then end() on the + /// input TokenStream.

+ /// NOTE: Be sure to call super.end() first when overriding this method. + ///

+ public override void End() + { + input.End(); + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (input != null) + { + input.Close(); + } + } + + //input = null; + isDisposed = true; + } + + /// Reset the filter as well as the input TokenStream. + public override void Reset() + { + input.Reset(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/TokenStream.cs b/external/Lucene.Net.Light/src/core/Analysis/TokenStream.cs new file mode 100644 index 0000000000..c6246960c0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/TokenStream.cs @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; +using Document = Lucene.Net.Documents.Document; +using Field = Lucene.Net.Documents.Field; +using IndexWriter = Lucene.Net.Index.IndexWriter; +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// A TokenStream enumerates the sequence of tokens, either from + /// s of a or from query text. + ///

+ /// This is an abstract class. Concrete subclasses are: + /// + /// , a TokenStream whose input is a Reader; and + /// , a TokenStream whose input is another + /// TokenStream. + /// + /// A new TokenStream API has been introduced with Lucene 2.9. This API + /// has moved from being based to based. While + /// still exists in 2.9 as a convenience class, the preferred way + /// to store the information of a is to use s. + ///

+ /// TokenStream now extends , which provides + /// access to all of the token s for the TokenStream. + /// Note that only one instance per is created and reused + /// for every token. This approach reduces object creation and allows local + /// caching of references to the s. See + /// for further details. + ///

+ /// The workflow of the new TokenStream API is as follows: + /// + /// Instantiation of TokenStream/s which add/get + /// attributes to/from the . + /// The consumer calls . + /// The consumer retrieves attributes from the stream and stores local + /// references to all attributes it wants to access + /// The consumer calls until it returns false and + /// consumes the attributes after each call. + /// The consumer calls so that any end-of-stream operations + /// can be performed. + /// The consumer calls to release any resource when finished + /// using the TokenStream + /// + /// To make sure that filters and consumers know which attributes are available, + /// the attributes must be added during instantiation. Filters and consumers are + /// not required to check for availability of attributes in + /// . + ///

+ /// You can find some example code for the new API in the analysis package level + /// Javadoc. + ///

+ /// Sometimes it is desirable to capture a current state of a TokenStream + /// , e. g. for buffering purposes (see , + /// ). For this usecase + /// and + /// can be used. + ///

+ public abstract class TokenStream : AttributeSource, IDisposable + { + /// A TokenStream using the default attribute factory. + protected internal TokenStream() + { } + + /// A TokenStream that uses the same attributes as the supplied one. + protected internal TokenStream(AttributeSource input) + : base(input) + { } + + /// A TokenStream using the supplied AttributeFactory for creating new instances. + protected internal TokenStream(AttributeFactory factory) + : base(factory) + { } + + /// Consumers (i.e., ) use this method to advance the stream to + /// the next token. Implementing classes must implement this method and update + /// the appropriate s with the attributes of the next + /// token. + /// + /// The producer must make no assumptions about the attributes after the + /// method has been returned: the caller may arbitrarily change it. If the + /// producer needs to preserve the state for subsequent calls, it can use + /// to create a copy of the current attribute state. + /// + /// This method is called for every token of a document, so an efficient + /// implementation is crucial for good performance. To avoid calls to + /// and , + /// references to all s that this stream uses should be + /// retrieved during instantiation. + /// + /// To ensure that filters and consumers know which attributes are available, + /// the attributes must be added during instantiation. Filters and consumers + /// are not required to check for availability of attributes in + /// . + /// + /// + /// false for end of stream; true otherwise + public abstract bool IncrementToken(); + + /// This method is called by the consumer after the last token has been + /// consumed, after returned false + /// (using the new TokenStream API). Streams implementing the old API + /// should upgrade to use this feature. + ///

+ /// This method can be used to perform any end-of-stream operations, such as + /// setting the final offset of a stream. The final offset of a stream might + /// differ from the offset of the last token eg in case one or more whitespaces + /// followed after the last token, but a was used. + /// + ///

+ /// IOException + public virtual void End() + { + // do nothing by default + } + + /// Resets this stream to the beginning. This is an optional operation, so + /// subclasses may or may not implement this method. is not needed for + /// the standard indexing process. However, if the tokens of a + /// TokenStream are intended to be consumed more than once, it is + /// necessary to implement . Note that if your TokenStream + /// caches tokens and feeds them back again after a reset, it is imperative + /// that you clone the tokens when you store them away (on the first pass) as + /// well as when you return them (on future passes after ). + /// + public virtual void Reset() + { + } + + /// Releases resources associated with this stream. + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/FlagsAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/FlagsAttribute.cs new file mode 100644 index 0000000000..b5c4b7bdc0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/FlagsAttribute.cs @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Attribute = Lucene.Net.Util.Attribute; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// This attribute can be used to pass different flags down the tokenizer chain, + /// eg from one TokenFilter to another one. + /// + [Serializable] + public class FlagsAttribute:Util.Attribute, IFlagsAttribute, System.ICloneable + { + private int flags = 0; + + /// EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. + ///

+ /// + /// Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + /// The flags can be used to encode information about the token for use by other s. + /// + /// + ///

+ /// The bits + public virtual int Flags + { + get { return flags; } + set { this.flags = value; } + } + + public override void Clear() + { + flags = 0; + } + + public override bool Equals(System.Object other) + { + if (this == other) + { + return true; + } + + if (other is FlagsAttribute) + { + return ((FlagsAttribute) other).flags == flags; + } + + return false; + } + + public override int GetHashCode() + { + return flags; + } + + public override void CopyTo(Attribute target) + { + IFlagsAttribute t = (IFlagsAttribute) target; + t.Flags = flags; + } + + override public System.Object Clone() + { + FlagsAttribute impl = new FlagsAttribute(); + impl.flags = this.flags; + return impl; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs new file mode 100644 index 0000000000..24b2bea5af --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IFlagsAttribute.cs @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; +using Tokenizer = Lucene.Net.Analysis.Tokenizer; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// This attribute can be used to pass different flags down the chain, + /// eg from one TokenFilter to another one. + /// + public interface IFlagsAttribute:IAttribute + { + /// EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. + ///

+ /// + /// Get the bitset for any bits that have been set. This is completely distinct from , although they do share similar purposes. + /// The flags can be used to encode information about the token for use by other s. + /// + /// + ///

+ /// The bits + int Flags { get; set; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs new file mode 100644 index 0000000000..ffbbe02476 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IOffsetAttribute.cs @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// The start and end character offset of a Token. + public interface IOffsetAttribute : IAttribute + { + /// Returns this Token's starting offset, the position of the first character + /// corresponding to this token in the source text. + /// Note that the difference between endOffset() and startOffset() may not be + /// equal to termText.length(), as the term text may have been altered by a + /// stemmer or some other filter. + /// + int StartOffset { get; } + + + /// Set the starting and ending offset. + /// See StartOffset() and EndOffset() + /// + void SetOffset(int startOffset, int endOffset); + + + /// Returns this Token's ending offset, one greater than the position of the + /// last character corresponding to this token in the source text. The length + /// of the token in the source text is (endOffset - startOffset). + /// + int EndOffset { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs new file mode 100644 index 0000000000..7e313ce31c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPayloadAttribute.cs @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; +using Payload = Lucene.Net.Index.Payload; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// The payload of a Token. See also . + public interface IPayloadAttribute:IAttribute + { + /// Returns this Token's payload. + Payload Payload { get; set; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs new file mode 100644 index 0000000000..6c2a131b47 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/IPositionIncrementAttribute.cs @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// The positionIncrement determines the position of this token + /// relative to the previous Token in a TokenStream, used in phrase + /// searching. + /// + ///

The default value is one. + /// + ///

Some common uses for this are: + /// + /// Set it to zero to put multiple terms in the same position. This is + /// useful if, e.g., a word has multiple stems. Searches for phrases + /// including either stem will match. In this case, all but the first stem's + /// increment should be set to zero: the increment of the first instance + /// should be one. Repeating a token with an increment of zero can also be + /// used to boost the scores of matches on that token. + /// + /// Set it to values greater than one to inhibit exact phrase matches. + /// If, for example, one does not want phrases to match across removed stop + /// words, then one could build a stop word filter that removes stop words and + /// also sets the increment to the number of stop words removed before each + /// non-stop word. Then exact phrase queries will only match when the terms + /// occur with no intervening stop words. + /// + /// + /// + ///

+ /// + /// + public interface IPositionIncrementAttribute:IAttribute + { + /// Gets or sets the position increment. The default value is one. + /// + /// + /// the distance from the prior term + int PositionIncrement { set; get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITermAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITermAttribute.cs new file mode 100644 index 0000000000..8f9b030339 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITermAttribute.cs @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// The term text of a Token. + public interface ITermAttribute:IAttribute + { + /// Returns the Token's term text. + /// + /// This method has a performance penalty + /// because the text is stored internally in a char[]. If + /// possible, use and + /// directly instead. If you really need a + /// String, use this method, which is nothing more than + /// a convenience call to new String(token.termBuffer(), 0, token.termLength()) + /// + string Term { get; } + + /// Copies the contents of buffer, starting at offset for + /// length characters, into the termBuffer array. + /// + /// the buffer to copy + /// + /// the index in the buffer of the first character to copy + /// + /// the number of characters to copy + /// + void SetTermBuffer(char[] buffer, int offset, int length); + + /// Copies the contents of buffer into the termBuffer array. + /// the buffer to copy + /// + void SetTermBuffer(System.String buffer); + + /// Copies the contents of buffer, starting at offset and continuing + /// for length characters, into the termBuffer array. + /// + /// the buffer to copy + /// + /// the index in the buffer of the first character to copy + /// + /// the number of characters to copy + /// + void SetTermBuffer(System.String buffer, int offset, int length); + + /// Returns the internal termBuffer character array which + /// you can then directly alter. If the array is too + /// small for your token, use + /// to increase it. After + /// altering the buffer be sure to call + /// to record the number of valid + /// characters that were placed into the termBuffer. + /// + char[] TermBuffer(); + + /// Grows the termBuffer to at least size newSize, preserving the + /// existing content. Note: If the next operation is to change + /// the contents of the term buffer use + /// , + /// , or + /// + /// to optimally combine the resize with the setting of the termBuffer. + /// + /// minimum size of the new termBuffer + /// + /// newly created termBuffer with length >= newSize + /// + char[] ResizeTermBuffer(int newSize); + + /// Return number of valid characters (length of the term) + /// in the termBuffer array. + /// + int TermLength(); + + /// Set number of valid characters (length of the term) in + /// the termBuffer array. Use this to truncate the termBuffer + /// or to synchronize with external manipulation of the termBuffer. + /// Note: to grow the size of the array, + /// use first. + /// + /// the truncated length + /// + void SetTermLength(int length); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITypeAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITypeAttribute.cs new file mode 100644 index 0000000000..48bcc10062 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/ITypeAttribute.cs @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// A Token's lexical type. The Default value is "word". + public interface ITypeAttribute:IAttribute + { + /// Gets or sets this Token's lexical type. Defaults to "word". + string Type { get; set; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/OffsetAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/OffsetAttribute.cs new file mode 100644 index 0000000000..51495599dd --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/OffsetAttribute.cs @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Attribute = Lucene.Net.Util.Attribute; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// The start and end character offset of a Token. + [Serializable] + public class OffsetAttribute:Attribute, IOffsetAttribute, System.ICloneable + { + private int startOffset; + private int endOffset; + + /// Returns this Token's starting offset, the position of the first character + /// corresponding to this token in the source text. + /// Note that the difference between endOffset() and startOffset() may not be + /// equal to termText.length(), as the term text may have been altered by a + /// stemmer or some other filter. + /// + public virtual int StartOffset + { + get { return startOffset; } + } + + + /// Set the starting and ending offset. + /// See StartOffset() and EndOffset() + /// + public virtual void SetOffset(int startOffset, int endOffset) + { + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + + /// Returns this Token's ending offset, one greater than the position of the + /// last character corresponding to this token in the source text. The length + /// of the token in the source text is (endOffset - startOffset). + /// + public virtual int EndOffset + { + get { return endOffset; } + } + + + public override void Clear() + { + startOffset = 0; + endOffset = 0; + } + + public override bool Equals(System.Object other) + { + if (other == this) + { + return true; + } + + if (other is OffsetAttribute) + { + OffsetAttribute o = (OffsetAttribute) other; + return o.startOffset == startOffset && o.endOffset == endOffset; + } + + return false; + } + + public override int GetHashCode() + { + int code = startOffset; + code = code * 31 + endOffset; + return code; + } + + public override void CopyTo(Attribute target) + { + IOffsetAttribute t = (IOffsetAttribute) target; + t.SetOffset(startOffset, endOffset); + } + + override public System.Object Clone() + { + OffsetAttribute impl = new OffsetAttribute(); + impl.endOffset = endOffset; + impl.startOffset = startOffset; + return impl; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PayloadAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PayloadAttribute.cs new file mode 100644 index 0000000000..ae1c4d93f8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PayloadAttribute.cs @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Attribute = Lucene.Net.Util.Attribute; +using Payload = Lucene.Net.Index.Payload; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// The payload of a Token. See also . + [Serializable] + public class PayloadAttribute:Attribute, IPayloadAttribute, System.ICloneable + { + private Payload payload; + + /// Initialize this attribute with no payload. + public PayloadAttribute() + { + } + + /// Initialize this attribute with the given payload. + public PayloadAttribute(Payload payload) + { + this.payload = payload; + } + + /// Returns this Token's payload. + public virtual Payload Payload + { + get { return this.payload; } + set { this.payload = value; } + } + + public override void Clear() + { + payload = null; + } + + public override System.Object Clone() + { + var clone = (PayloadAttribute) base.Clone(); + if (payload != null) + { + clone.payload = (Payload) payload.Clone(); + } + return clone; + // TODO: This code use to be as below. Any reason why? the if(payload!=null) was missing... + //PayloadAttributeImpl impl = new PayloadAttributeImpl(); + //impl.payload = new Payload(this.payload.data, this.payload.offset, this.payload.length); + //return impl; + } + + public override bool Equals(System.Object other) + { + if (other == this) + { + return true; + } + + if (other is IPayloadAttribute) + { + PayloadAttribute o = (PayloadAttribute) other; + if (o.payload == null || payload == null) + { + return o.payload == null && payload == null; + } + + return o.payload.Equals(payload); + } + + return false; + } + + public override int GetHashCode() + { + return (payload == null)?0:payload.GetHashCode(); + } + + public override void CopyTo(Attribute target) + { + IPayloadAttribute t = (IPayloadAttribute) target; + t.Payload = (payload == null)?null:(Payload) payload.Clone(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs new file mode 100644 index 0000000000..4f7a04fbf9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/PositionIncrementAttribute.cs @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Attribute = Lucene.Net.Util.Attribute; +using TokenStream = Lucene.Net.Analysis.TokenStream; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// The positionIncrement determines the position of this token + /// relative to the previous Token in a , used in phrase + /// searching. + /// + ///

The default value is one. + /// + ///

Some common uses for this are: + /// + /// Set it to zero to put multiple terms in the same position. This is + /// useful if, e.g., a word has multiple stems. Searches for phrases + /// including either stem will match. In this case, all but the first stem's + /// increment should be set to zero: the increment of the first instance + /// should be one. Repeating a token with an increment of zero can also be + /// used to boost the scores of matches on that token. + /// + /// Set it to values greater than one to inhibit exact phrase matches. + /// If, for example, one does not want phrases to match across removed stop + /// words, then one could build a stop word filter that removes stop words and + /// also sets the increment to the number of stop words removed before each + /// non-stop word. Then exact phrase queries will only match when the terms + /// occur with no intervening stop words. + /// + /// + ///

+ [Serializable] + public class PositionIncrementAttribute:Attribute, IPositionIncrementAttribute, System.ICloneable + { + private int positionIncrement = 1; + + /// Set the position increment. The default value is one. + /// + /// + /// the distance from the prior term + public virtual int PositionIncrement + { + set + { + if (value < 0) + throw new System.ArgumentException("Increment must be zero or greater: " + value); + this.positionIncrement = value; + } + get { return positionIncrement; } + } + + public override void Clear() + { + this.positionIncrement = 1; + } + + public override bool Equals(System.Object other) + { + if (other == this) + { + return true; + } + + if (other is PositionIncrementAttribute) + { + return positionIncrement == ((PositionIncrementAttribute) other).positionIncrement; + } + + return false; + } + + public override int GetHashCode() + { + return positionIncrement; + } + + public override void CopyTo(Attribute target) + { + IPositionIncrementAttribute t = (IPositionIncrementAttribute) target; + t.PositionIncrement = positionIncrement; + } + + override public System.Object Clone() + { + PositionIncrementAttribute impl = new PositionIncrementAttribute(); + impl.positionIncrement = positionIncrement; + return impl; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TermAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TermAttribute.cs new file mode 100644 index 0000000000..f95402c179 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TermAttribute.cs @@ -0,0 +1,268 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; +using Attribute = Lucene.Net.Util.Attribute; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// The term text of a Token. + [Serializable] + public class TermAttribute:Attribute, ITermAttribute, System.ICloneable + { + private static int MIN_BUFFER_SIZE = 10; + + private char[] termBuffer; + private int termLength; + + /// Returns the Token's term text. + /// + /// This method has a performance penalty + /// because the text is stored internally in a char[]. If + /// possible, use and + /// directly instead. If you + /// really need a String, use this method, which is nothing more than + /// a convenience call to new String(token.termBuffer(), 0, token.termLength()) + /// + public virtual string Term + { + get + { + InitTermBuffer(); + return new System.String(termBuffer, 0, termLength); + } + } + + /// Copies the contents of buffer, starting at offset for + /// length characters, into the termBuffer array. + /// + /// the buffer to copy + /// + /// the index in the buffer of the first character to copy + /// + /// the number of characters to copy + /// + public virtual void SetTermBuffer(char[] buffer, int offset, int length) + { + GrowTermBuffer(length); + Array.Copy(buffer, offset, termBuffer, 0, length); + termLength = length; + } + + /// Copies the contents of buffer into the termBuffer array. + /// the buffer to copy + /// + public virtual void SetTermBuffer(System.String buffer) + { + int length = buffer.Length; + GrowTermBuffer(length); + TextSupport.GetCharsFromString(buffer, 0, length, termBuffer, 0); + termLength = length; + } + + /// Copies the contents of buffer, starting at offset and continuing + /// for length characters, into the termBuffer array. + /// + /// the buffer to copy + /// + /// the index in the buffer of the first character to copy + /// + /// the number of characters to copy + /// + public virtual void SetTermBuffer(System.String buffer, int offset, int length) + { + System.Diagnostics.Debug.Assert(offset <= buffer.Length); + System.Diagnostics.Debug.Assert(offset + length <= buffer.Length); + GrowTermBuffer(length); + TextSupport.GetCharsFromString(buffer, offset, offset + length, termBuffer, 0); + termLength = length; + } + + /// Returns the internal termBuffer character array which + /// you can then directly alter. If the array is too + /// small for your token, use + /// to increase it. After + /// altering the buffer be sure to call + /// to record the number of valid + /// characters that were placed into the termBuffer. + /// + public virtual char[] TermBuffer() + { + InitTermBuffer(); + return termBuffer; + } + + /// Grows the termBuffer to at least size newSize, preserving the + /// existing content. Note: If the next operation is to change + /// the contents of the term buffer use + /// , + /// , or + /// + /// to optimally combine the resize with the setting of the termBuffer. + /// + /// minimum size of the new termBuffer + /// + /// newly created termBuffer with length >= newSize + /// + public virtual char[] ResizeTermBuffer(int newSize) + { + if (termBuffer == null) + { + // The buffer is always at least MIN_BUFFER_SIZE + termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)]; + } + else + { + if (termBuffer.Length < newSize) + { + // Not big enough; create a new array with slight + // over allocation and preserve content + char[] newCharBuffer = new char[ArrayUtil.GetNextSize(newSize)]; + Array.Copy(termBuffer, 0, newCharBuffer, 0, termBuffer.Length); + termBuffer = newCharBuffer; + } + } + return termBuffer; + } + + + /// Allocates a buffer char[] of at least newSize, without preserving the existing content. + /// its always used in places that set the content + /// + /// minimum size of the buffer + /// + private void GrowTermBuffer(int newSize) + { + if (termBuffer == null) + { + // The buffer is always at least MIN_BUFFER_SIZE + termBuffer = new char[ArrayUtil.GetNextSize(newSize < MIN_BUFFER_SIZE?MIN_BUFFER_SIZE:newSize)]; + } + else + { + if (termBuffer.Length < newSize) + { + // Not big enough; create a new array with slight + // over allocation: + termBuffer = new char[ArrayUtil.GetNextSize(newSize)]; + } + } + } + + private void InitTermBuffer() + { + if (termBuffer == null) + { + termBuffer = new char[ArrayUtil.GetNextSize(MIN_BUFFER_SIZE)]; + termLength = 0; + } + } + + /// Return number of valid characters (length of the term) + /// in the termBuffer array. + /// + public virtual int TermLength() + { + return termLength; + } + + /// Set number of valid characters (length of the term) in + /// the termBuffer array. Use this to truncate the termBuffer + /// or to synchronize with external manipulation of the termBuffer. + /// Note: to grow the size of the array, + /// use first. + /// + /// the truncated length + /// + public virtual void SetTermLength(int length) + { + InitTermBuffer(); + if (length > termBuffer.Length) + throw new System.ArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.Length + ")"); + termLength = length; + } + + public override int GetHashCode() + { + InitTermBuffer(); + int code = termLength; + code = code * 31 + ArrayUtil.HashCode(termBuffer, 0, termLength); + return code; + } + + public override void Clear() + { + termLength = 0; + } + + public override System.Object Clone() + { + TermAttribute t = (TermAttribute) base.Clone(); + // Do a deep clone + if (termBuffer != null) + { + t.termBuffer = new char[termBuffer.Length]; + termBuffer.CopyTo(t.termBuffer, 0); + } + return t; + } + + public override bool Equals(System.Object other) + { + if (other == this) + { + return true; + } + + if (other is ITermAttribute) + { + InitTermBuffer(); + TermAttribute o = ((TermAttribute) other); + o.InitTermBuffer(); + + if (termLength != o.termLength) + return false; + for (int i = 0; i < termLength; i++) + { + if (termBuffer[i] != o.termBuffer[i]) + { + return false; + } + } + return true; + } + + return false; + } + + public override System.String ToString() + { + InitTermBuffer(); + return "term=" + new System.String(termBuffer, 0, termLength); + } + + public override void CopyTo(Attribute target) + { + InitTermBuffer(); + ITermAttribute t = (ITermAttribute) target; + t.SetTermBuffer(termBuffer, 0, termLength); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TypeAttribute.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TypeAttribute.cs new file mode 100644 index 0000000000..1da1c50f89 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenattributes/TypeAttribute.cs @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Attribute = Lucene.Net.Util.Attribute; + +namespace Lucene.Net.Analysis.Tokenattributes +{ + + /// A Token's lexical type. The Default value is "word". + [Serializable] + public class TypeAttribute:Attribute, ITypeAttribute, System.ICloneable + { + private System.String type; + public const System.String DEFAULT_TYPE = "word"; + + public TypeAttribute():this(DEFAULT_TYPE) + { + } + + public TypeAttribute(System.String type) + { + this.type = type; + } + + /// Returns this Token's lexical type. Defaults to "word". + public virtual string Type + { + get { return type; } + set { this.type = value; } + } + + public override void Clear() + { + type = DEFAULT_TYPE; + } + + public override bool Equals(System.Object other) + { + if (other == this) + { + return true; + } + + if (other is TypeAttribute) + { + return type.Equals(((TypeAttribute) other).type); + } + + return false; + } + + public override int GetHashCode() + { + return type.GetHashCode(); + } + + public override void CopyTo(Attribute target) + { + ITypeAttribute t = (ITypeAttribute) target; + t.Type = type; + } + + override public System.Object Clone() + { + TypeAttribute impl = new TypeAttribute(); + impl.type = type; + return impl; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/Tokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/Tokenizer.cs new file mode 100644 index 0000000000..5ab741ef18 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/Tokenizer.cs @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// A Tokenizer is a TokenStream whose input is a Reader. + ///

+ /// This is an abstract class; subclasses must override + ///

+ /// NOTE: Subclasses overriding must call + /// before setting attributes. + ///

+ + public abstract class Tokenizer:TokenStream + { + /// The text source for this Tokenizer. + protected internal System.IO.TextReader input; + + private bool isDisposed; + + /// Construct a tokenizer with null input. + protected internal Tokenizer() + { + } + + /// Construct a token stream processing the given input. + protected internal Tokenizer(System.IO.TextReader input) + { + this.input = CharReader.Get(input); + } + + /// Construct a tokenizer with null input using the given AttributeFactory. + protected internal Tokenizer(AttributeFactory factory):base(factory) + { + } + + /// Construct a token stream processing the given input using the given AttributeFactory. + protected internal Tokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory) + { + this.input = CharReader.Get(input); + } + + /// Construct a token stream processing the given input using the given AttributeSource. + protected internal Tokenizer(AttributeSource source):base(source) + { + } + + /// Construct a token stream processing the given input using the given AttributeSource. + protected internal Tokenizer(AttributeSource source, System.IO.TextReader input):base(source) + { + this.input = CharReader.Get(input); + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (input != null) + { + input.Close(); + } + } + + // LUCENE-2387: don't hold onto Reader after close, so + // GC can reclaim + input = null; + isDisposed = true; + } + + /// Return the corrected offset. If is a subclass + /// this method calls , else returns currentOff. + /// + /// offset as seen in the output + /// + /// corrected offset based on the input + /// + /// + /// + protected internal int CorrectOffset(int currentOff) + { + return (input is CharStream)?((CharStream) input).CorrectOffset(currentOff):currentOff; + } + + /// Expert: Reset the tokenizer to a new reader. Typically, an + /// analyzer (in its reusableTokenStream method) will use + /// this to re-use a previously created tokenizer. + /// + public virtual void Reset(System.IO.TextReader input) + { + this.input = input; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/WhitespaceAnalyzer.cs b/external/Lucene.Net.Light/src/core/Analysis/WhitespaceAnalyzer.cs new file mode 100644 index 0000000000..77dbaa3511 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/WhitespaceAnalyzer.cs @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Analysis +{ + + /// An Analyzer that uses . + + public sealed class WhitespaceAnalyzer:Analyzer + { + public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) + { + return new WhitespaceTokenizer(reader); + } + + public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader) + { + var tokenizer = (Tokenizer) PreviousTokenStream; + if (tokenizer == null) + { + tokenizer = new WhitespaceTokenizer(reader); + PreviousTokenStream = tokenizer; + } + else + tokenizer.Reset(reader); + return tokenizer; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/WhitespaceTokenizer.cs b/external/Lucene.Net.Light/src/core/Analysis/WhitespaceTokenizer.cs new file mode 100644 index 0000000000..c96ad50f94 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/WhitespaceTokenizer.cs @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Analysis +{ + + /// A WhitespaceTokenizer is a tokenizer that divides text at whitespace. + /// Adjacent sequences of non-Whitespace characters form tokens. + /// + + public class WhitespaceTokenizer:CharTokenizer + { + /// Construct a new WhitespaceTokenizer. + public WhitespaceTokenizer(System.IO.TextReader @in) + : base(@in) + { + } + + /// Construct a new WhitespaceTokenizer using a given . + public WhitespaceTokenizer(AttributeSource source, System.IO.TextReader @in) + : base(source, @in) + { + } + + /// Construct a new WhitespaceTokenizer using a given . + public WhitespaceTokenizer(AttributeFactory factory, System.IO.TextReader @in) + : base(factory, @in) + { + } + + /// Collects only characters which do not satisfy + /// . + /// + protected internal override bool IsTokenChar(char c) + { + return !System.Char.IsWhiteSpace(c); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Analysis/WordlistLoader.cs b/external/Lucene.Net.Light/src/core/Analysis/WordlistLoader.cs new file mode 100644 index 0000000000..bfd1b07471 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Analysis/WordlistLoader.cs @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; + +namespace Lucene.Net.Analysis +{ + + /// Loader for text files that represent a list of stopwords. + public class WordlistLoader + { + + /// Loads a text file and adds every line as an entry to a HashSet (omitting + /// leading and trailing whitespace). Every line of the file should contain only + /// one word. The words need to be in lowercase if you make use of an + /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + /// + /// File containing the wordlist + /// A HashSet with the file's words + public static ISet GetWordSet(System.IO.FileInfo wordfile) + { + using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default)) + { + return GetWordSet(reader); + } + } + + /// Loads a text file and adds every non-comment line as an entry to a HashSet (omitting + /// leading and trailing whitespace). Every line of the file should contain only + /// one word. The words need to be in lowercase if you make use of an + /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + /// + /// File containing the wordlist + /// The comment string to ignore + /// A HashSet with the file's words + public static ISet GetWordSet(System.IO.FileInfo wordfile, System.String comment) + { + using (var reader = new System.IO.StreamReader(wordfile.FullName, System.Text.Encoding.Default)) + { + return GetWordSet(reader, comment); + } + } + + + /// Reads lines from a Reader and adds every line as an entry to a HashSet (omitting + /// leading and trailing whitespace). Every line of the Reader should contain only + /// one word. The words need to be in lowercase if you make use of an + /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + /// + /// Reader containing the wordlist + /// A HashSet with the reader's words + public static ISet GetWordSet(System.IO.TextReader reader) + { + var result = Support.Compatibility.SetFactory.CreateHashSet(); + + System.String word; + while ((word = reader.ReadLine()) != null) + { + result.Add(word.Trim()); + } + + return result; + } + + /// Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting + /// leading and trailing whitespace). Every line of the Reader should contain only + /// one word. The words need to be in lowercase if you make use of an + /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + /// + /// + /// Reader containing the wordlist + /// + /// The string representing a comment. + /// + /// A HashSet with the reader's words + /// + public static ISet GetWordSet(System.IO.TextReader reader, System.String comment) + { + var result = Support.Compatibility.SetFactory.CreateHashSet(); + + System.String word = null; + while ((word = reader.ReadLine()) != null) + { + if (word.StartsWith(comment) == false) + { + result.Add(word.Trim()); + } + } + + return result; + } + + + + /// Reads a stem dictionary. Each line contains: + /// word\tstem + /// (i.e. two tab seperated words) + /// + /// + /// stem dictionary that overrules the stemming algorithm + /// + /// IOException + public static Dictionary GetStemDict(System.IO.FileInfo wordstemfile) + { + if (wordstemfile == null) + throw new System.NullReferenceException("wordstemfile may not be null"); + var result = new Dictionary(); + System.IO.StreamReader br = null; + System.IO.StreamReader fr = null; + try + { + fr = new System.IO.StreamReader(wordstemfile.FullName, System.Text.Encoding.Default); + br = new System.IO.StreamReader(fr.BaseStream, fr.CurrentEncoding); + System.String line; + char[] tab = {'\t'}; + while ((line = br.ReadLine()) != null) + { + System.String[] wordstem = line.Split(tab, 2); + result[wordstem[0]] = wordstem[1]; + } + } + finally + { + if (fr != null) + fr.Close(); + if (br != null) + br.Close(); + } + return result; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/AbstractField.cs b/external/Lucene.Net.Light/src/core/Document/AbstractField.cs new file mode 100644 index 0000000000..a526f1de2d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/AbstractField.cs @@ -0,0 +1,312 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using TokenStream = Lucene.Net.Analysis.TokenStream; +using StringHelper = Lucene.Net.Util.StringHelper; +using PhraseQuery = Lucene.Net.Search.PhraseQuery; +using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; + +namespace Lucene.Net.Documents +{ + /// + /// + /// + /// + [Serializable] + public abstract class AbstractField : IFieldable + { + + protected internal System.String internalName = "body"; + protected internal bool storeTermVector = false; + protected internal bool storeOffsetWithTermVector = false; + protected internal bool storePositionWithTermVector = false; + protected internal bool internalOmitNorms = false; + protected internal bool internalIsStored = false; + protected internal bool internalIsIndexed = true; + protected internal bool internalIsTokenized = true; + protected internal bool internalIsBinary = false; + protected internal bool lazy = false; + protected internal bool internalOmitTermFreqAndPositions = false; + protected internal float internalBoost = 1.0f; + // the data object for all different kind of field values + protected internal System.Object fieldsData = null; + // pre-analyzed tokenStream for indexed fields + protected internal TokenStream tokenStream; + // length/offset for all primitive types + protected internal int internalBinaryLength; + protected internal int internalbinaryOffset; + + protected internal AbstractField() + { + } + + protected internal AbstractField(System.String name, Field.Store store, Field.Index index, Field.TermVector termVector) + { + if (name == null) + throw new System.NullReferenceException("name cannot be null"); + this.internalName = StringHelper.Intern(name); // field names are interned + + this.internalIsStored = store.IsStored(); + this.internalIsIndexed = index.IsIndexed(); + this.internalIsTokenized = index.IsAnalyzed(); + this.internalOmitNorms = index.OmitNorms(); + + this.internalIsBinary = false; + + SetStoreTermVector(termVector); + } + + /// Gets or sets the boost factor for hits for this field. + /// + ///

The default value is 1.0. + /// + ///

Note: this value is not stored directly with the document in the index. + /// Documents returned from and + /// may thus not have the same value present as when + /// this field was indexed. + ///

+ public virtual float Boost + { + get { return internalBoost; } + set { this.internalBoost = value; } + } + + /// Returns the name of the field as an interned string. + /// For example "date", "title", "body", ... + /// + public virtual string Name + { + get { return internalName; } + } + + protected internal virtual void SetStoreTermVector(Field.TermVector termVector) + { + this.storeTermVector = termVector.IsStored(); + this.storePositionWithTermVector = termVector.WithPositions(); + this.storeOffsetWithTermVector = termVector.WithOffsets(); + } + + /// True iff the value of the field is to be stored in the index for return + /// with search hits. It is an error for this to be true if a field is + /// Reader-valued. + /// + public bool IsStored + { + get { return internalIsStored; } + } + + /// True iff the value of the field is to be indexed, so that it may be + /// searched on. + /// + public bool IsIndexed + { + get { return internalIsIndexed; } + } + + /// True iff the value of the field should be tokenized as text prior to + /// indexing. Un-tokenized fields are indexed as a single word and may not be + /// Reader-valued. + /// + public bool IsTokenized + { + get { return internalIsTokenized; } + } + + /// True iff the term or terms used to index this field are stored as a term + /// vector, available from . + /// These methods do not provide access to the original content of the field, + /// only to terms used to index it. If the original content must be + /// preserved, use the stored attribute instead. + /// + /// + /// + /// + public bool IsTermVectorStored + { + get { return storeTermVector; } + } + + /// True iff terms are stored as term vector together with their offsets + /// (start and end position in source text). + /// + public virtual bool IsStoreOffsetWithTermVector + { + get { return storeOffsetWithTermVector; } + } + + /// True iff terms are stored as term vector together with their token positions. + public virtual bool IsStorePositionWithTermVector + { + get { return storePositionWithTermVector; } + } + + /// True iff the value of the filed is stored as binary + public bool IsBinary + { + get { return internalIsBinary; } + } + + + /// Return the raw byte[] for the binary field. Note that + /// you must also call and + /// to know which range of bytes in this + /// returned array belong to the field. + /// + /// reference to the Field value as byte[]. + public virtual byte[] GetBinaryValue() + { + return GetBinaryValue(null); + } + + public virtual byte[] GetBinaryValue(byte[] result) + { + if (internalIsBinary || fieldsData is byte[]) + return (byte[]) fieldsData; + else + return null; + } + + /// Returns length of byte[] segment that is used as value, if Field is not binary + /// returned value is undefined + /// + /// length of byte[] segment that represents this Field value + public virtual int BinaryLength + { + get + { + if (internalIsBinary) + { + return internalBinaryLength; + } + return fieldsData is byte[] ? ((byte[]) fieldsData).Length : 0; + } + } + + /// Returns offset into byte[] segment that is used as value, if Field is not binary + /// returned value is undefined + /// + /// index of the first character in byte[] segment that represents this Field value + public virtual int BinaryOffset + { + get { return internalbinaryOffset; } + } + + /// True if norms are omitted for this indexed field + public virtual bool OmitNorms + { + get { return internalOmitNorms; } + set { this.internalOmitNorms = value; } + } + + /// Expert: + /// + /// If set, omit term freq, positions and payloads from + /// postings for this field. + /// + ///

NOTE: While this option reduces storage space + /// required in the index, it also means any query + /// requiring positional information, such as + /// or subclasses will + /// silently fail to find results. + ///

+ public virtual bool OmitTermFreqAndPositions + { + set { this.internalOmitTermFreqAndPositions = value; } + get { return internalOmitTermFreqAndPositions; } + } + + public virtual bool IsLazy + { + get { return lazy; } + } + + /// Prints a Field for human consumption. + public override System.String ToString() + { + System.Text.StringBuilder result = new System.Text.StringBuilder(); + if (internalIsStored) + { + result.Append("stored"); + } + if (internalIsIndexed) + { + if (result.Length > 0) + result.Append(","); + result.Append("indexed"); + } + if (internalIsTokenized) + { + if (result.Length > 0) + result.Append(","); + result.Append("tokenized"); + } + if (storeTermVector) + { + if (result.Length > 0) + result.Append(","); + result.Append("termVector"); + } + if (storeOffsetWithTermVector) + { + if (result.Length > 0) + result.Append(","); + result.Append("termVectorOffsets"); + } + if (storePositionWithTermVector) + { + if (result.Length > 0) + result.Append(","); + result.Append("termVectorPosition"); + } + if (internalIsBinary) + { + if (result.Length > 0) + result.Append(","); + result.Append("binary"); + } + if (internalOmitNorms) + { + result.Append(",omitNorms"); + } + if (internalOmitTermFreqAndPositions) + { + result.Append(",omitTermFreqAndPositions"); + } + if (lazy) + { + result.Append(",lazy"); + } + result.Append('<'); + result.Append(internalName); + result.Append(':'); + + if (fieldsData != null && lazy == false) + { + result.Append(fieldsData); + } + + result.Append('>'); + return result.ToString(); + } + + public abstract TokenStream TokenStreamValue { get; } + public abstract TextReader ReaderValue { get; } + public abstract string StringValue { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/CompressionTools.cs b/external/Lucene.Net.Light/src/core/Document/CompressionTools.cs new file mode 100644 index 0000000000..400633f3aa --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/CompressionTools.cs @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// To enable compression support in Lucene.Net , +// you will need to define 'SHARP_ZIP_LIB' and reference the SharpLibZip +// library. The SharpLibZip library can be downloaded from: +// http://www.icsharpcode.net/OpenSource/SharpZipLib/ + +using System; +using Lucene.Net.Support; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Documents +{ + + /// Simple utility class providing static methods to + /// compress and decompress binary data for stored fields. + /// This class uses java.util.zip.Deflater and Inflater + /// classes to compress and decompress. + /// + + public class CompressionTools + { + + // Export only static methods + private CompressionTools() + { + } + + /// Compresses the specified byte range using the + /// specified compressionLevel (constants are defined in + /// java.util.zip.Deflater). + /// + public static byte[] Compress(byte[] value_Renamed, int offset, int length, int compressionLevel) + { + /* Create an expandable byte array to hold the compressed data. + * You cannot use an array that's the same size as the orginal because + * there is no guarantee that the compressed data will be smaller than + * the uncompressed data. */ + System.IO.MemoryStream bos = new System.IO.MemoryStream(length); + + Deflater compressor = SharpZipLib.CreateDeflater(); + + try + { + compressor.SetLevel(compressionLevel); + compressor.SetInput(value_Renamed, offset, length); + compressor.Finish(); + + // Compress the data + byte[] buf = new byte[1024]; + while (!compressor.IsFinished) + { + int count = compressor.Deflate(buf); + bos.Write(buf, 0, count); + } + } + finally + { + } + + return bos.ToArray(); + } + + /// Compresses the specified byte range, with default BEST_COMPRESSION level + public static byte[] Compress(byte[] value_Renamed, int offset, int length) + { + return Compress(value_Renamed, offset, length, Deflater.BEST_COMPRESSION); + } + + /// Compresses all bytes in the array, with default BEST_COMPRESSION level + public static byte[] Compress(byte[] value_Renamed) + { + return Compress(value_Renamed, 0, value_Renamed.Length, Deflater.BEST_COMPRESSION); + } + + /// Compresses the String value, with default BEST_COMPRESSION level + public static byte[] CompressString(System.String value_Renamed) + { + return CompressString(value_Renamed, Deflater.BEST_COMPRESSION); + } + + /// Compresses the String value using the specified + /// compressionLevel (constants are defined in + /// java.util.zip.Deflater). + /// + public static byte[] CompressString(System.String value_Renamed, int compressionLevel) + { + UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result(); + UnicodeUtil.UTF16toUTF8(value_Renamed, 0, value_Renamed.Length, result); + return Compress(result.result, 0, result.length, compressionLevel); + } + + /// Decompress the byte array previously returned by + /// compress + /// + public static byte[] Decompress(byte[] value_Renamed) + { + // Create an expandable byte array to hold the decompressed data + System.IO.MemoryStream bos = new System.IO.MemoryStream(value_Renamed.Length); + + Inflater decompressor = SharpZipLib.CreateInflater(); + + try + { + decompressor.SetInput(value_Renamed); + + // Decompress the data + byte[] buf = new byte[1024]; + while (!decompressor.IsFinished) + { + int count = decompressor.Inflate(buf); + bos.Write(buf, 0, count); + } + } + finally + { + } + + return bos.ToArray(); + } + + /// Decompress the byte array previously returned by + /// compressString back into a String + /// + public static System.String DecompressString(byte[] value_Renamed) + { + UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result(); + byte[] bytes = Decompress(value_Renamed); + UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.Length, result); + return new System.String(result.result, 0, result.length); + } + } +} + diff --git a/external/Lucene.Net.Light/src/core/Document/DateField.cs b/external/Lucene.Net.Light/src/core/Document/DateField.cs new file mode 100644 index 0000000000..6179f4c14d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/DateField.cs @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Search; +using Lucene.Net.Support; +using NumericUtils = Lucene.Net.Util.NumericUtils; +using PrefixQuery = Lucene.Net.Search.PrefixQuery; +using TermRangeQuery = Lucene.Net.Search.TermRangeQuery; +// for javadoc + +namespace Lucene.Net.Documents +{ + // for javadoc + + // do not remove in 3.0, needed for reading old indexes! + + /// Provides support for converting dates to strings and vice-versa. + /// The strings are structured so that lexicographic sorting orders by date, + /// which makes them suitable for use as field values and search terms. + /// + ///

Note that this class saves dates with millisecond granularity, + /// which is bad for and , as those + /// queries are expanded to a BooleanQuery with a potentially large number + /// of terms when searching. Thus you might want to use + /// instead. + /// + ///

+ /// Note: dates before 1970 cannot be used, and therefore cannot be + /// indexed when using this class. See for an + /// alternative without such a limitation. + /// + ///

+ /// Another approach is , which provides + /// a sortable binary representation (prefix encoded) of numeric values, which + /// date/time are. + /// For indexing a , convert it to unix timestamp as + /// long and + /// index this as a numeric value with + /// and use to query it. + /// + ///

+ /// If you build a new index, use or + /// instead. + /// This class is included for use with existing + /// indices and will be removed in a future (possibly Lucene 4.0) + /// + [Obsolete("If you build a new index, use DateTools or NumericField instead.This class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0).")] + public class DateField + { + + private DateField() + { + } + + // make date strings long enough to last a millenium + private static int DATE_LEN = Number.ToString(1000L * 365 * 24 * 60 * 60 * 1000, Number.MAX_RADIX).Length; + + public static System.String MIN_DATE_STRING() + { + return TimeToString(0); + } + + public static System.String MAX_DATE_STRING() + { + char[] buffer = new char[DATE_LEN]; + char c = Character.ForDigit(Character.MAX_RADIX - 1, Character.MAX_RADIX); + for (int i = 0; i < DATE_LEN; i++) + buffer[i] = c; + return new System.String(buffer); + } + + /// Converts a Date to a string suitable for indexing. + /// RuntimeException if the date specified in the + /// method argument is before 1970 + /// + public static System.String DateToString(System.DateTime date) + { + TimeSpan ts = date.Subtract(new DateTime(1970, 1, 1)); + ts = ts.Subtract(TimeZone.CurrentTimeZone.GetUtcOffset(date)); + return TimeToString(ts.Ticks / TimeSpan.TicksPerMillisecond); + } + /// Converts a millisecond time to a string suitable for indexing. + /// RuntimeException if the time specified in the + /// method argument is negative, that is, before 1970 + /// + public static System.String TimeToString(long time) + { + if (time < 0) + throw new System.SystemException("time '" + time + "' is too early, must be >= 0"); + + System.String s = Number.ToString(time, Character.MAX_RADIX); + + if (s.Length > DATE_LEN) + throw new System.SystemException("time '" + time + "' is too late, length of string " + "representation must be <= " + DATE_LEN); + + // Pad with leading zeros + if (s.Length < DATE_LEN) + { + System.Text.StringBuilder sb = new System.Text.StringBuilder(s); + while (sb.Length < DATE_LEN) + sb.Insert(0, 0); + s = sb.ToString(); + } + + return s; + } + + /// Converts a string-encoded date into a millisecond time. + public static long StringToTime(System.String s) + { + return Number.Parse(s, Number.MAX_RADIX); + } + /// Converts a string-encoded date into a Date object. + public static System.DateTime StringToDate(System.String s) + { + long ticks = StringToTime(s) * TimeSpan.TicksPerMillisecond; + System.DateTime date = new System.DateTime(1970, 1, 1); + date = date.AddTicks(ticks); + date = date.Add(TimeZone.CurrentTimeZone.GetUtcOffset(date)); + return date; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/DateTools.cs b/external/Lucene.Net.Light/src/core/Document/DateTools.cs new file mode 100644 index 0000000000..8263df1c34 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/DateTools.cs @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Search; +using NumericUtils = Lucene.Net.Util.NumericUtils; + +namespace Lucene.Net.Documents +{ + + /// Provides support for converting dates to strings and vice-versa. + /// The strings are structured so that lexicographic sorting orders + /// them by date, which makes them suitable for use as field values + /// and search terms. + /// + ///

This class also helps you to limit the resolution of your dates. Do not + /// save dates with a finer resolution than you really need, as then + /// RangeQuery and PrefixQuery will require more memory and become slower. + /// + ///

Compared to the strings generated by the methods + /// in this class take slightly more space, unless your selected resolution + /// is set to Resolution.DAY or lower. + /// + ///

+ /// Another approach is , which provides + /// a sortable binary representation (prefix encoded) of numeric values, which + /// date/time are. + /// For indexing a , convert it to unix timestamp as + /// long and + /// index this as a numeric value with + /// and use to query it. + ///

+ public class DateTools + { + + private static readonly System.String YEAR_FORMAT = "yyyy"; + private static readonly System.String MONTH_FORMAT = "yyyyMM"; + private static readonly System.String DAY_FORMAT = "yyyyMMdd"; + private static readonly System.String HOUR_FORMAT = "yyyyMMddHH"; + private static readonly System.String MINUTE_FORMAT = "yyyyMMddHHmm"; + private static readonly System.String SECOND_FORMAT = "yyyyMMddHHmmss"; + private static readonly System.String MILLISECOND_FORMAT = "yyyyMMddHHmmssfff"; + + private static readonly System.Globalization.Calendar calInstance = new System.Globalization.GregorianCalendar(); + + // cannot create, the class has static methods only + private DateTools() + { + } + + /// Converts a Date to a string suitable for indexing. + /// + /// + /// the date to be converted + /// + /// the desired resolution, see + /// + /// + /// a string in format yyyyMMddHHmmssSSS or shorter, + /// depending on resolution; using GMT as timezone + /// + public static System.String DateToString(System.DateTime date, Resolution resolution) + { + return TimeToString(date.Ticks / TimeSpan.TicksPerMillisecond, resolution); + } + + /// Converts a millisecond time to a string suitable for indexing. + /// + /// + /// the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT + /// + /// the desired resolution, see + /// + /// + /// a string in format yyyyMMddHHmmssSSS or shorter, + /// depending on resolution; using GMT as timezone + /// + public static System.String TimeToString(long time, Resolution resolution) + { + System.DateTime date = new System.DateTime(Round(time, resolution)); + + if (resolution == Resolution.YEAR) + { + return date.ToString(YEAR_FORMAT, System.Globalization.CultureInfo.InvariantCulture); + } + else if (resolution == Resolution.MONTH) + { + return date.ToString(MONTH_FORMAT, System.Globalization.CultureInfo.InvariantCulture); + } + else if (resolution == Resolution.DAY) + { + return date.ToString(DAY_FORMAT, System.Globalization.CultureInfo.InvariantCulture); + } + else if (resolution == Resolution.HOUR) + { + return date.ToString(HOUR_FORMAT, System.Globalization.CultureInfo.InvariantCulture); + } + else if (resolution == Resolution.MINUTE) + { + return date.ToString(MINUTE_FORMAT, System.Globalization.CultureInfo.InvariantCulture); + } + else if (resolution == Resolution.SECOND) + { + return date.ToString(SECOND_FORMAT, System.Globalization.CultureInfo.InvariantCulture); + } + else if (resolution == Resolution.MILLISECOND) + { + return date.ToString(MILLISECOND_FORMAT, System.Globalization.CultureInfo.InvariantCulture); + } + + throw new System.ArgumentException("unknown resolution " + resolution); + } + + /// Converts a string produced by timeToString or + /// DateToString back to a time, represented as the + /// number of milliseconds since January 1, 1970, 00:00:00 GMT. + /// + /// + /// the date string to be converted + /// + /// the number of milliseconds since January 1, 1970, 00:00:00 GMT + /// + /// ParseException if dateString is not in the + /// expected format + /// + public static long StringToTime(System.String dateString) + { + return StringToDate(dateString).Ticks; + } + + /// Converts a string produced by timeToString or + /// DateToString back to a time, represented as a + /// Date object. + /// + /// + /// the date string to be converted + /// + /// the parsed time as a Date object + /// + /// ParseException if dateString is not in the + /// expected format + /// + public static System.DateTime StringToDate(System.String dateString) + { + System.DateTime date; + if (dateString.Length == 4) + { + date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)), + 1, 1, 0, 0, 0, 0); + } + else if (dateString.Length == 6) + { + date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)), + Convert.ToInt16(dateString.Substring(4, 2)), + 1, 0, 0, 0, 0); + } + else if (dateString.Length == 8) + { + date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)), + Convert.ToInt16(dateString.Substring(4, 2)), + Convert.ToInt16(dateString.Substring(6, 2)), + 0, 0, 0, 0); + } + else if (dateString.Length == 10) + { + date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)), + Convert.ToInt16(dateString.Substring(4, 2)), + Convert.ToInt16(dateString.Substring(6, 2)), + Convert.ToInt16(dateString.Substring(8, 2)), + 0, 0, 0); + } + else if (dateString.Length == 12) + { + date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)), + Convert.ToInt16(dateString.Substring(4, 2)), + Convert.ToInt16(dateString.Substring(6, 2)), + Convert.ToInt16(dateString.Substring(8, 2)), + Convert.ToInt16(dateString.Substring(10, 2)), + 0, 0); + } + else if (dateString.Length == 14) + { + date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)), + Convert.ToInt16(dateString.Substring(4, 2)), + Convert.ToInt16(dateString.Substring(6, 2)), + Convert.ToInt16(dateString.Substring(8, 2)), + Convert.ToInt16(dateString.Substring(10, 2)), + Convert.ToInt16(dateString.Substring(12, 2)), + 0); + } + else if (dateString.Length == 17) + { + date = new System.DateTime(Convert.ToInt16(dateString.Substring(0, 4)), + Convert.ToInt16(dateString.Substring(4, 2)), + Convert.ToInt16(dateString.Substring(6, 2)), + Convert.ToInt16(dateString.Substring(8, 2)), + Convert.ToInt16(dateString.Substring(10, 2)), + Convert.ToInt16(dateString.Substring(12, 2)), + Convert.ToInt16(dateString.Substring(14, 3))); + } + else + { + throw new System.FormatException("Input is not valid date string: " + dateString); + } + return date; + } + + /// Limit a date's resolution. For example, the date 2004-09-21 13:50:11 + /// will be changed to 2004-09-01 00:00:00 when using + /// Resolution.MONTH. + /// + /// + /// + /// The desired resolution of the date to be returned + /// + /// the date with all values more precise than resolution + /// set to 0 or 1 + /// + public static System.DateTime Round(System.DateTime date, Resolution resolution) + { + return new System.DateTime(Round(date.Ticks / TimeSpan.TicksPerMillisecond, resolution)); + } + + /// Limit a date's resolution. For example, the date 1095767411000 + /// (which represents 2004-09-21 13:50:11) will be changed to + /// 1093989600000 (2004-09-01 00:00:00) when using + /// Resolution.MONTH. + /// + /// + /// The time in milliseconds (not ticks). + /// The desired resolution of the date to be returned + /// + /// the date with all values more precise than resolution + /// set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT + /// + public static long Round(long time, Resolution resolution) + { + System.DateTime dt = new System.DateTime(time * TimeSpan.TicksPerMillisecond); + + if (resolution == Resolution.YEAR) + { + dt = dt.AddMonths(1 - dt.Month); + dt = dt.AddDays(1 - dt.Day); + dt = dt.AddHours(0 - dt.Hour); + dt = dt.AddMinutes(0 - dt.Minute); + dt = dt.AddSeconds(0 - dt.Second); + dt = dt.AddMilliseconds(0 - dt.Millisecond); + } + else if (resolution == Resolution.MONTH) + { + dt = dt.AddDays(1 - dt.Day); + dt = dt.AddHours(0 - dt.Hour); + dt = dt.AddMinutes(0 - dt.Minute); + dt = dt.AddSeconds(0 - dt.Second); + dt = dt.AddMilliseconds(0 - dt.Millisecond); + } + else if (resolution == Resolution.DAY) + { + dt = dt.AddHours(0 - dt.Hour); + dt = dt.AddMinutes(0 - dt.Minute); + dt = dt.AddSeconds(0 - dt.Second); + dt = dt.AddMilliseconds(0 - dt.Millisecond); + } + else if (resolution == Resolution.HOUR) + { + dt = dt.AddMinutes(0 - dt.Minute); + dt = dt.AddSeconds(0 - dt.Second); + dt = dt.AddMilliseconds(0 - dt.Millisecond); + } + else if (resolution == Resolution.MINUTE) + { + dt = dt.AddSeconds(0 - dt.Second); + dt = dt.AddMilliseconds(0 - dt.Millisecond); + } + else if (resolution == Resolution.SECOND) + { + dt = dt.AddMilliseconds(0 - dt.Millisecond); + } + else if (resolution == Resolution.MILLISECOND) + { + // don't cut off anything + } + else + { + throw new System.ArgumentException("unknown resolution " + resolution); + } + return dt.Ticks; + } + + /// Specifies the time granularity. + public class Resolution + { + + public static readonly Resolution YEAR = new Resolution("year"); + public static readonly Resolution MONTH = new Resolution("month"); + public static readonly Resolution DAY = new Resolution("day"); + public static readonly Resolution HOUR = new Resolution("hour"); + public static readonly Resolution MINUTE = new Resolution("minute"); + public static readonly Resolution SECOND = new Resolution("second"); + public static readonly Resolution MILLISECOND = new Resolution("millisecond"); + + private System.String resolution; + + internal Resolution() + { + } + + internal Resolution(System.String resolution) + { + this.resolution = resolution; + } + + public override System.String ToString() + { + return resolution; + } + } + static DateTools() + { + { + // times need to be normalized so the value doesn't depend on the + // location the index is created/used: + // {{Aroush-2.1}} + /* + YEAR_FORMAT.setTimeZone(GMT); + MONTH_FORMAT.setTimeZone(GMT); + DAY_FORMAT.setTimeZone(GMT); + HOUR_FORMAT.setTimeZone(GMT); + MINUTE_FORMAT.setTimeZone(GMT); + SECOND_FORMAT.setTimeZone(GMT); + MILLISECOND_FORMAT.setTimeZone(GMT); + */ + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/Document.cs b/external/Lucene.Net.Light/src/core/Document/Document.cs new file mode 100644 index 0000000000..f24a46aea0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/Document.cs @@ -0,0 +1,382 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +// for javadoc +using IndexReader = Lucene.Net.Index.IndexReader; +using ScoreDoc = Lucene.Net.Search.ScoreDoc; +using Searcher = Lucene.Net.Search.Searcher; + +namespace Lucene.Net.Documents +{ + + /// Documents are the unit of indexing and search. + /// + /// A Document is a set of fields. Each field has a name and a textual value. + /// A field may be stored with the document, in which + /// case it is returned with search hits on the document. Thus each document + /// should typically contain one or more stored fields which uniquely identify + /// it. + /// + ///

Note that fields which are not stored are + /// not available in documents retrieved from the index, e.g. with , + /// or . + ///

+ + [Serializable] + public sealed class Document + { + private class AnonymousClassEnumeration : System.Collections.IEnumerator + { + public AnonymousClassEnumeration(Document enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(Document enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + iter = Enclosing_Instance.fields.GetEnumerator(); + } + private System.Object tempAuxObj; + public bool MoveNext() + { + bool result = HasMoreElements(); + if (result) + { + tempAuxObj = NextElement(); + } + return result; + } + public void Reset() + { + tempAuxObj = null; + } + public System.Object Current + { + get + { + return tempAuxObj; + } + + } + private Document enclosingInstance; + public Document Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal System.Collections.IEnumerator iter; + public bool HasMoreElements() + { + return iter.MoveNext(); + } + public System.Object NextElement() + { + return iter.Current; + } + } + internal System.Collections.Generic.IList fields = new System.Collections.Generic.List(); + private float boost = 1.0f; + + /// Constructs a new document with no fields. + public Document() + { + } + + + /// Gets or sets, at indexing time, the boost factor. + /// + /// The default is 1.0 + /// + ///

Note that once a document is indexed this value is no longer available + /// from the index. At search time, for retrieved documents, this method always + /// returns 1. This however does not mean that the boost value set at indexing + /// time was ignored - it was just combined with other indexing time factors and + /// stored elsewhere, for better indexing and search performance. (For more + /// information see the "norm(t,d)" part of the scoring formula in + /// Similarity.) + ///

+ public float Boost + { + get { return boost; } + set { this.boost = value; } + } + + ///

Adds a field to a document. Several fields may be added with + /// the same name. In this case, if the fields are indexed, their text is + /// treated as though appended for the purposes of search.

+ ///

Note that add like the removeField(s) methods only makes sense + /// prior to adding a document to an index. These methods cannot + /// be used to change the content of an existing index! In order to achieve this, + /// a document has to be deleted from an index and a new changed version of that + /// document has to be added.

+ ///

+ public void Add(IFieldable field) + { + fields.Add(field); + } + + ///

Removes field with the specified name from the document. + /// If multiple fields exist with this name, this method removes the first field that has been added. + /// If there is no field with the specified name, the document remains unchanged.

+ ///

Note that the removeField(s) methods like the add method only make sense + /// prior to adding a document to an index. These methods cannot + /// be used to change the content of an existing index! In order to achieve this, + /// a document has to be deleted from an index and a new changed version of that + /// document has to be added.

+ ///

+ public void RemoveField(System.String name) + { + System.Collections.Generic.IEnumerator it = fields.GetEnumerator(); + while (it.MoveNext()) + { + IFieldable field = it.Current; + if (field.Name.Equals(name)) + { + fields.Remove(field); + return ; + } + } + } + + ///

Removes all fields with the given name from the document. + /// If there is no field with the specified name, the document remains unchanged.

+ ///

Note that the removeField(s) methods like the add method only make sense + /// prior to adding a document to an index. These methods cannot + /// be used to change the content of an existing index! In order to achieve this, + /// a document has to be deleted from an index and a new changed version of that + /// document has to be added.

+ ///

+ public void RemoveFields(System.String name) + { + for (int i = fields.Count - 1; i >= 0; i--) + { + IFieldable field = fields[i]; + if (field.Name.Equals(name)) + { + fields.RemoveAt(i); + } + } + } + + /// Returns a field with the given name if any exist in this document, or + /// null. If multiple fields exists with this name, this method returns the + /// first value added. + /// Do not use this method with lazy loaded fields. + /// + public Field GetField(System.String name) + { + return (Field) GetFieldable(name); + } + + + /// Returns a field with the given name if any exist in this document, or + /// null. If multiple fields exists with this name, this method returns the + /// first value added. + /// + public IFieldable GetFieldable(System.String name) + { + foreach(IFieldable field in fields) + { + if (field.Name.Equals(name)) + return field; + } + return null; + } + + /// Returns the string value of the field with the given name if any exist in + /// this document, or null. If multiple fields exist with this name, this + /// method returns the first value added. If only binary fields with this name + /// exist, returns null. + /// + public System.String Get(System.String name) + { + foreach(IFieldable field in fields) + { + if (field.Name.Equals(name) && (!field.IsBinary)) + return field.StringValue; + } + return null; + } + + /// Returns a List of all the fields in a document. + ///

Note that fields which are not stored are + /// not available in documents retrieved from the + /// index, e.g. or . + ///

+ public System.Collections.Generic.IList GetFields() + { + return fields; + } + + private static readonly Field[] NO_FIELDS = new Field[0]; + + /// Returns an array of s with the given name. + /// Do not use with lazy loaded fields. + /// This method returns an empty array when there are no + /// matching fields. It never returns null. + /// + /// + /// the name of the field + /// + /// a Field[] array + /// + public Field[] GetFields(System.String name) + { + var result = new System.Collections.Generic.List(); + foreach(IFieldable field in fields) + { + if (field.Name.Equals(name)) + { + result.Add((Field)field); + } + } + + if (result.Count == 0) + return NO_FIELDS; + + return result.ToArray(); + } + + + private static readonly IFieldable[] NO_FIELDABLES = new IFieldable[0]; + + /// Returns an array of s with the given name. + /// This method returns an empty array when there are no + /// matching fields. It never returns null. + /// + /// + /// the name of the field + /// + /// a Fieldable[] array + /// + public IFieldable[] GetFieldables(System.String name) + { + var result = new System.Collections.Generic.List(); + foreach(IFieldable field in fields) + { + if (field.Name.Equals(name)) + { + result.Add(field); + } + } + + if (result.Count == 0) + return NO_FIELDABLES; + + return result.ToArray(); + } + + + private static readonly System.String[] NO_STRINGS = new System.String[0]; + + /// Returns an array of values of the field specified as the method parameter. + /// This method returns an empty array when there are no + /// matching fields. It never returns null. + /// + /// the name of the field + /// + /// a String[] of field values + /// + public System.String[] GetValues(System.String name) + { + var result = new System.Collections.Generic.List(); + foreach(IFieldable field in fields) + { + if (field.Name.Equals(name) && (!field.IsBinary)) + result.Add(field.StringValue); + } + + if (result.Count == 0) + return NO_STRINGS; + + return result.ToArray(); + } + + private static readonly byte[][] NO_BYTES = new byte[0][]; + + /// Returns an array of byte arrays for of the fields that have the name specified + /// as the method parameter. This method returns an empty + /// array when there are no matching fields. It never + /// returns null. + /// + /// + /// the name of the field + /// + /// a byte[][] of binary field values + /// + public byte[][] GetBinaryValues(System.String name) + { + var result = new System.Collections.Generic.List(); + foreach(IFieldable field in fields) + { + if (field.Name.Equals(name) && (field.IsBinary)) + result.Add(field.GetBinaryValue()); + } + + if (result.Count == 0) + return NO_BYTES; + + return result.ToArray(); + } + + /// Returns an array of bytes for the first (or only) field that has the name + /// specified as the method parameter. This method will return null + /// if no binary fields with the specified name are available. + /// There may be non-binary fields with the same name. + /// + /// + /// the name of the field. + /// + /// a byte[] containing the binary field value or null + /// + public byte[] GetBinaryValue(System.String name) + { + foreach(IFieldable field in fields) + { + if (field.Name.Equals(name) && (field.IsBinary)) + return field.GetBinaryValue(); + } + return null; + } + + /// Prints the fields of a document for human consumption. + public override System.String ToString() + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("Document<"); + for (int i = 0; i < fields.Count; i++) + { + IFieldable field = fields[i]; + buffer.Append(field.ToString()); + if (i != fields.Count - 1) + buffer.Append(" "); + } + buffer.Append(">"); + return buffer.ToString(); + } + + public System.Collections.Generic.IList fields_ForNUnit + { + get { return fields; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/Field.cs b/external/Lucene.Net.Light/src/core/Document/Field.cs new file mode 100644 index 0000000000..d39d9f476b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/Field.cs @@ -0,0 +1,667 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using TokenStream = Lucene.Net.Analysis.TokenStream; +using IndexWriter = Lucene.Net.Index.IndexWriter; +using StringHelper = Lucene.Net.Util.StringHelper; + +namespace Lucene.Net.Documents +{ + + /// A field is a section of a Document. Each field has two parts, a name and a + /// value. Values may be free text, provided as a String or as a Reader, or they + /// may be atomic keywords, which are not further processed. Such keywords may + /// be used to represent dates, urls, etc. Fields are optionally stored in the + /// index, so that they may be returned with hits on the document. + /// + + [Serializable] + public sealed class Field:AbstractField, IFieldable + { + /// Specifies whether and how a field should be stored. + public enum Store + { + /// Store the original field value in the index. This is useful for short texts + /// like a document's title which should be displayed with the results. The + /// value is stored in its original form, i.e. no analyzer is used before it is + /// stored. + /// + YES, + + /// Do not store the field value in the index. + NO + } + + /// Specifies whether and how a field should be indexed. + + public enum Index + { + /// Do not index the field value. This field can thus not be searched, + /// but one can still access its contents provided it is + /// stored. + /// + NO, + + /// Index the tokens produced by running the field's + /// value through an Analyzer. This is useful for + /// common text. + /// + ANALYZED, + + /// Index the field's value without using an Analyzer, so it can be searched. + /// As no analyzer is used the value will be stored as a single term. This is + /// useful for unique Ids like product numbers. + /// + NOT_ANALYZED, + + /// Expert: Index the field's value without an Analyzer, + /// and also disable the storing of norms. Note that you + /// can also separately enable/disable norms by setting + /// . No norms means that + /// index-time field and document boosting and field + /// length normalization are disabled. The benefit is + /// less memory usage as norms take up one byte of RAM + /// per indexed field for every document in the index, + /// during searching. Note that once you index a given + /// field with norms enabled, disabling norms will + /// have no effect. In other words, for this to have the + /// above described effect on a field, all instances of + /// that field must be indexed with NOT_ANALYZED_NO_NORMS + /// from the beginning. + /// + NOT_ANALYZED_NO_NORMS, + + /// Expert: Index the tokens produced by running the + /// field's value through an Analyzer, and also + /// separately disable the storing of norms. See + /// for what norms are + /// and why you may want to disable them. + /// + ANALYZED_NO_NORMS, + } + + /// Specifies whether and how a field should have term vectors. + public enum TermVector + { + /// Do not store term vectors. + NO, + + /// Store the term vectors of each document. A term vector is a list + /// of the document's terms and their number of occurrences in that document. + /// + YES, + + /// Store the term vector + token position information + /// + /// + /// + /// + WITH_POSITIONS, + + /// Store the term vector + Token offset information + /// + /// + /// + /// + WITH_OFFSETS, + + /// Store the term vector + Token position and offset information + /// + /// + /// + /// + /// + /// + /// + /// + WITH_POSITIONS_OFFSETS, + } + + + /// The value of the field as a String, or null. If null, the Reader value or + /// binary value is used. Exactly one of stringValue(), + /// readerValue(), and getBinaryValue() must be set. + /// + public override string StringValue + { + get { return fieldsData is System.String ? (System.String) fieldsData : null; } + } + + /// The value of the field as a Reader, or null. If null, the String value or + /// binary value is used. Exactly one of stringValue(), + /// readerValue(), and getBinaryValue() must be set. + /// + public override TextReader ReaderValue + { + get { return fieldsData is System.IO.TextReader ? (System.IO.TextReader) fieldsData : null; } + } + + /// The TokesStream for this field to be used when indexing, or null. If null, the Reader value + /// or String value is analyzed to produce the indexed tokens. + /// + public override TokenStream TokenStreamValue + { + get { return tokenStream; } + } + + + ///

Expert: change the value of this field. This can + /// be used during indexing to re-use a single Field + /// instance to improve indexing speed by avoiding GC cost + /// of new'ing and reclaiming Field instances. Typically + /// a single instance is re-used as + /// well. This helps most on small documents.

+ /// + ///

Each Field instance should only be used once + /// within a single instance. See ImproveIndexingSpeed + /// for details.

+ ///

+ public void SetValue(System.String value) + { + if (internalIsBinary) + { + throw new System.ArgumentException("cannot set a String value on a binary field"); + } + fieldsData = value; + } + + /// Expert: change the value of this field. See setValue(String). + public void SetValue(System.IO.TextReader value) + { + if (internalIsBinary) + { + throw new System.ArgumentException("cannot set a Reader value on a binary field"); + } + if (internalIsStored) + { + throw new System.ArgumentException("cannot set a Reader value on a stored field"); + } + fieldsData = value; + } + + /// Expert: change the value of this field. See setValue(String). + public void SetValue(byte[] value) + { + if (!internalIsBinary) + { + throw new System.ArgumentException("cannot set a byte[] value on a non-binary field"); + } + fieldsData = value; + internalBinaryLength = value.Length; + internalbinaryOffset = 0; + } + + /// Expert: change the value of this field. See setValue(String). + public void SetValue(byte[] value, int offset, int length) + { + if (!internalIsBinary) + { + throw new System.ArgumentException("cannot set a byte[] value on a non-binary field"); + } + fieldsData = value; + internalBinaryLength = length; + internalbinaryOffset = offset; + } + + /// Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. + /// May be combined with stored values from stringValue() or GetBinaryValue() + /// + public void SetTokenStream(TokenStream tokenStream) + { + this.internalIsIndexed = true; + this.internalIsTokenized = true; + this.tokenStream = tokenStream; + } + + /// Create a field by specifying its name, value and how it will + /// be saved in the index. Term vectors will not be stored in the index. + /// + /// + /// The name of the field + /// + /// The string to process + /// + /// Whether value should be stored in the index + /// + /// Whether the field should be indexed, and if so, if it should + /// be tokenized before indexing + /// + /// NullPointerException if name or value is null + /// IllegalArgumentException if the field is neither stored nor indexed + public Field(System.String name, System.String value, Store store, Index index) + : this(name, value, store, index, TermVector.NO) + { + } + + /// Create a field by specifying its name, value and how it will + /// be saved in the index. + /// + /// + /// The name of the field + /// + /// The string to process + /// + /// Whether value should be stored in the index + /// + /// Whether the field should be indexed, and if so, if it should + /// be tokenized before indexing + /// + /// Whether term vector should be stored + /// + /// NullPointerException if name or value is null + /// IllegalArgumentException in any of the following situations: + /// + /// the field is neither stored nor indexed + /// the field is not indexed but termVector is TermVector.YES + /// + /// + public Field(System.String name, System.String value, Store store, Index index, TermVector termVector) + : this(name, true, value, store, index, termVector) + { + } + + /// Create a field by specifying its name, value and how it will + /// be saved in the index. + /// + /// + /// The name of the field + /// + /// Whether to .intern() name or not + /// + /// The string to process + /// + /// Whether value should be stored in the index + /// + /// Whether the field should be indexed, and if so, if it should + /// be tokenized before indexing + /// + /// Whether term vector should be stored + /// + /// NullPointerException if name or value is null + /// IllegalArgumentException in any of the following situations: + /// + /// the field is neither stored nor indexed + /// the field is not indexed but termVector is TermVector.YES + /// + /// + public Field(System.String name, bool internName, System.String value, Store store, Index index, TermVector termVector) + { + if (name == null) + throw new System.NullReferenceException("name cannot be null"); + if (value == null) + throw new System.NullReferenceException("value cannot be null"); + if (name.Length == 0 && value.Length == 0) + throw new System.ArgumentException("name and value cannot both be empty"); + if (index == Index.NO && store == Store.NO) + throw new System.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored"); + if (index == Index.NO && termVector != TermVector.NO) + throw new System.ArgumentException("cannot store term vector information " + "for a field that is not indexed"); + + if (internName) + // field names are optionally interned + name = StringHelper.Intern(name); + + this.internalName = name; + + this.fieldsData = value; + + this.internalIsStored = store.IsStored(); + + this.internalIsIndexed = index.IsIndexed(); + this.internalIsTokenized = index.IsAnalyzed(); + this.internalOmitNorms = index.OmitNorms(); + + if (index == Index.NO) + { + this.internalOmitTermFreqAndPositions = false; + } + + this.internalIsBinary = false; + + SetStoreTermVector(termVector); + } + + /// Create a tokenized and indexed field that is not stored. Term vectors will + /// not be stored. The Reader is read only when the Document is added to the index, + /// i.e. you may not close the Reader until + /// has been called. + /// + /// + /// The name of the field + /// + /// The reader with the content + /// + /// NullPointerException if name or reader is null + public Field(System.String name, System.IO.TextReader reader):this(name, reader, TermVector.NO) + { + } + + /// Create a tokenized and indexed field that is not stored, optionally with + /// storing term vectors. The Reader is read only when the Document is added to the index, + /// i.e. you may not close the Reader until + /// has been called. + /// + /// + /// The name of the field + /// + /// The reader with the content + /// + /// Whether term vector should be stored + /// + /// NullPointerException if name or reader is null + public Field(System.String name, System.IO.TextReader reader, TermVector termVector) + { + if (name == null) + throw new System.NullReferenceException("name cannot be null"); + if (reader == null) + throw new System.NullReferenceException("reader cannot be null"); + + this.internalName = StringHelper.Intern(name); // field names are interned + this.fieldsData = reader; + + this.internalIsStored = false; + + this.internalIsIndexed = true; + this.internalIsTokenized = true; + + this.internalIsBinary = false; + + SetStoreTermVector(termVector); + } + + /// Create a tokenized and indexed field that is not stored. Term vectors will + /// not be stored. This is useful for pre-analyzed fields. + /// The TokenStream is read only when the Document is added to the index, + /// i.e. you may not close the TokenStream until + /// has been called. + /// + /// + /// The name of the field + /// + /// The TokenStream with the content + /// + /// NullPointerException if name or tokenStream is null + public Field(System.String name, TokenStream tokenStream):this(name, tokenStream, TermVector.NO) + { + } + + /// Create a tokenized and indexed field that is not stored, optionally with + /// storing term vectors. This is useful for pre-analyzed fields. + /// The TokenStream is read only when the Document is added to the index, + /// i.e. you may not close the TokenStream until + /// has been called. + /// + /// + /// The name of the field + /// + /// The TokenStream with the content + /// + /// Whether term vector should be stored + /// + /// NullPointerException if name or tokenStream is null + public Field(System.String name, TokenStream tokenStream, TermVector termVector) + { + if (name == null) + throw new System.NullReferenceException("name cannot be null"); + if (tokenStream == null) + throw new System.NullReferenceException("tokenStream cannot be null"); + + this.internalName = StringHelper.Intern(name); // field names are interned + this.fieldsData = null; + this.tokenStream = tokenStream; + + this.internalIsStored = false; + + this.internalIsIndexed = true; + this.internalIsTokenized = true; + + this.internalIsBinary = false; + + SetStoreTermVector(termVector); + } + + + /// Create a stored field with binary value. Optionally the value may be compressed. + /// + /// + /// The name of the field + /// + /// The binary value + /// + /// How value should be stored (compressed or not) + /// + /// IllegalArgumentException if store is Store.NO + public Field(System.String name, byte[] value_Renamed, Store store):this(name, value_Renamed, 0, value_Renamed.Length, store) + { + } + + /// Create a stored field with binary value. Optionally the value may be compressed. + /// + /// + /// The name of the field + /// + /// The binary value + /// + /// Starting offset in value where this Field's bytes are + /// + /// Number of bytes to use for this Field, starting at offset + /// + /// How value should be stored (compressed or not) + /// + /// IllegalArgumentException if store is Store.NO + public Field(System.String name, byte[] value_Renamed, int offset, int length, Store store) + { + + if (name == null) + throw new System.ArgumentException("name cannot be null"); + if (value_Renamed == null) + throw new System.ArgumentException("value cannot be null"); + + this.internalName = StringHelper.Intern(name); // field names are interned + fieldsData = value_Renamed; + + if (store == Store.NO) + throw new System.ArgumentException("binary values can't be unstored"); + + internalIsStored = store.IsStored(); + internalIsIndexed = false; + internalIsTokenized = false; + internalOmitTermFreqAndPositions = false; + internalOmitNorms = true; + + internalIsBinary = true; + internalBinaryLength = length; + internalbinaryOffset = offset; + + SetStoreTermVector(TermVector.NO); + } + } + + public static class FieldExtensions + { + public static bool IsStored(this Field.Store store) + { + switch(store) + { + case Field.Store.YES: + return true; + case Field.Store.NO: + return false; + default: + throw new ArgumentOutOfRangeException("store", "Invalid value for Field.Store"); + } + } + + public static bool IsIndexed(this Field.Index index) + { + switch(index) + { + case Field.Index.NO: + return false; + case Field.Index.ANALYZED: + case Field.Index.NOT_ANALYZED: + case Field.Index.NOT_ANALYZED_NO_NORMS: + case Field.Index.ANALYZED_NO_NORMS: + return true; + default: + throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index"); + } + } + + public static bool IsAnalyzed(this Field.Index index) + { + switch (index) + { + case Field.Index.NO: + case Field.Index.NOT_ANALYZED: + case Field.Index.NOT_ANALYZED_NO_NORMS: + return false; + case Field.Index.ANALYZED: + case Field.Index.ANALYZED_NO_NORMS: + return true; + default: + throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index"); + } + } + + public static bool OmitNorms(this Field.Index index) + { + switch (index) + { + case Field.Index.ANALYZED: + case Field.Index.NOT_ANALYZED: + return false; + case Field.Index.NO: + case Field.Index.NOT_ANALYZED_NO_NORMS: + case Field.Index.ANALYZED_NO_NORMS: + return true; + default: + throw new ArgumentOutOfRangeException("index", "Invalid value for Field.Index"); + } + } + + public static bool IsStored(this Field.TermVector tv) + { + switch(tv) + { + case Field.TermVector.NO: + return false; + case Field.TermVector.YES: + case Field.TermVector.WITH_OFFSETS: + case Field.TermVector.WITH_POSITIONS: + case Field.TermVector.WITH_POSITIONS_OFFSETS: + return true; + default: + throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector"); + } + } + + public static bool WithPositions(this Field.TermVector tv) + { + switch (tv) + { + case Field.TermVector.NO: + case Field.TermVector.YES: + case Field.TermVector.WITH_OFFSETS: + return false; + case Field.TermVector.WITH_POSITIONS: + case Field.TermVector.WITH_POSITIONS_OFFSETS: + return true; + default: + throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector"); + } + } + + public static bool WithOffsets(this Field.TermVector tv) + { + switch (tv) + { + case Field.TermVector.NO: + case Field.TermVector.YES: + case Field.TermVector.WITH_POSITIONS: + return false; + case Field.TermVector.WITH_OFFSETS: + case Field.TermVector.WITH_POSITIONS_OFFSETS: + return true; + default: + throw new ArgumentOutOfRangeException("tv", "Invalid value for Field.TermVector"); + } + } + + public static Field.Index ToIndex(bool indexed, bool analyed) + { + return ToIndex(indexed, analyed, false); + } + + public static Field.Index ToIndex(bool indexed, bool analyzed, bool omitNorms) + { + + // If it is not indexed nothing else matters + if (!indexed) + { + return Field.Index.NO; + } + + // typical, non-expert + if (!omitNorms) + { + if (analyzed) + { + return Field.Index.ANALYZED; + } + return Field.Index.NOT_ANALYZED; + } + + // Expert: Norms omitted + if (analyzed) + { + return Field.Index.ANALYZED_NO_NORMS; + } + return Field.Index.NOT_ANALYZED_NO_NORMS; + } + + /// + /// Get the best representation of a TermVector given the flags. + /// + public static Field.TermVector ToTermVector(bool stored, bool withOffsets, bool withPositions) + { + // If it is not stored, nothing else matters. + if (!stored) + { + return Field.TermVector.NO; + } + + if (withOffsets) + { + if (withPositions) + { + return Field.TermVector.WITH_POSITIONS_OFFSETS; + } + return Field.TermVector.WITH_OFFSETS; + } + + if (withPositions) + { + return Field.TermVector.WITH_POSITIONS; + } + return Field.TermVector.YES; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/FieldSelector.cs b/external/Lucene.Net.Light/src/core/Document/FieldSelector.cs new file mode 100644 index 0000000000..f940f08500 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/FieldSelector.cs @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Documents +{ + /// Similar to a + /// java.io.FileFilter, the FieldSelector allows one to make decisions about + /// what Fields get loaded on a by + /// + public interface FieldSelector + { + + /// + /// the field to accept or reject + /// + /// an instance of + /// if the named fieldName should be loaded. + /// + FieldSelectorResult Accept(System.String fieldName); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/FieldSelectorResult.cs b/external/Lucene.Net.Light/src/core/Document/FieldSelectorResult.cs new file mode 100644 index 0000000000..7d3a889cb2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/FieldSelectorResult.cs @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using System.Runtime.InteropServices; + +namespace Lucene.Net.Documents +{ + /// Provides information about what should be done with this Field + public enum FieldSelectorResult + { + /// + /// + /// + INVALID, // TODO: This is kinda a kludgy workaround for the fact enums can't be null + + /// Load this every time the is loaded, reading in the data as it is encountered. + /// and should not return null. + ///

+ /// should be called by the Reader. + ///

+ LOAD, + + /// Lazily load this . This means the is valid, but it may not actually contain its data until + /// invoked. SHOULD NOT BE USED. is safe to use and should + /// return a valid instance of a . + ///

+ /// should be called by the Reader. + ///

+ LAZY_LOAD, + + /// Do not load the . and should return null. + /// is not called. + ///

+ /// should not be called by the Reader. + ///

+ NO_LOAD, + + /// Load this field as in the case, but immediately return from loading for the . Thus, the + /// Document may not have its complete set of Fields. and should + /// both be valid for this + ///

+ /// should be called by the Reader. + ///

+ LOAD_AND_BREAK, + + /// Expert: Load the size of this rather than its value. + /// Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value. + /// The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0] + /// + SIZE, + + /// Expert: Like but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded + SIZE_AND_BREAK + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/Fieldable.cs b/external/Lucene.Net.Light/src/core/Document/Fieldable.cs new file mode 100644 index 0000000000..89d37d1c6a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/Fieldable.cs @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using TokenStream = Lucene.Net.Analysis.TokenStream; +using FieldInvertState = Lucene.Net.Index.FieldInvertState; + +namespace Lucene.Net.Documents +{ + /// Synonymous with . + /// + ///

WARNING: This interface may change within minor versions, despite Lucene's backward compatibility requirements. + /// This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards + /// compatibility promises remain intact. For example, Lucene can still + /// read and write indices created within the same major version. + ///

+ /// + /// + ///

+ public interface IFieldable + { + /// Gets or sets the boost factor for hits for this field. This value will be + /// multiplied into the score of all hits on this this field of this + /// document. + /// + ///

The boost is multiplied by of the document + /// containing this field. If a document has multiple fields with the same + /// name, all such values are multiplied together. This product is then + /// used to compute the norm factor for the field. By + /// default, in the + /// method, the boost value is multiplied + /// by the + /// and then rounded by before it is stored in the + /// index. One should attempt to ensure that this product does not overflow + /// the range of that encoding. + /// + ///

The default value is 1.0. + /// + ///

Note: this value is not stored directly with the document in the index. + /// Documents returned from and + /// may thus not have the same value present as when + /// this field was indexed. + /// + ///

+ /// + /// + /// + /// + /// + /// + float Boost { get; set; } + + /// Returns the name of the field as an interned string. + /// For example "date", "title", "body", ... + /// + string Name { get; } + + /// The value of the field as a String, or null. + ///

+ /// For indexing, if isStored()==true, the stringValue() will be used as the stored field value + /// unless isBinary()==true, in which case GetBinaryValue() will be used. + /// + /// If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. + /// If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null, + /// else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens. + ///

+ string StringValue { get; } + + /// The value of the field as a Reader, which can be used at index time to generate indexed tokens. + /// + /// + TextReader ReaderValue { get; } + + /// The TokenStream for this field to be used when indexing, or null. + /// + /// + TokenStream TokenStreamValue { get; } + + /// True if the value of the field is to be stored in the index for return + /// with search hits. + /// + bool IsStored { get; } + + /// True if the value of the field is to be indexed, so that it may be + /// searched on. + /// + bool IsIndexed { get; } + + /// True if the value of the field should be tokenized as text prior to + /// indexing. Un-tokenized fields are indexed as a single word and may not be + /// Reader-valued. + /// + bool IsTokenized { get; } + + /// True if the term or terms used to index this field are stored as a term + /// vector, available from . + /// These methods do not provide access to the original content of the field, + /// only to terms used to index it. If the original content must be + /// preserved, use the stored attribute instead. + /// + /// + /// + /// + bool IsTermVectorStored { get; } + + /// True if terms are stored as term vector together with their offsets + /// (start and end positon in source text). + /// + bool IsStoreOffsetWithTermVector { get; } + + /// True if terms are stored as term vector together with their token positions. + bool IsStorePositionWithTermVector { get; } + + /// True if the value of the field is stored as binary + bool IsBinary { get; } + + /// + /// True if norms are omitted for this indexed field. + /// + /// Expert: + /// If set, omit normalization factors associated with this indexed field. + /// This effectively disables indexing boosts and length normalization for this field. + /// + /// + bool OmitNorms { get; set; } + + + /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving + /// it's values via or is only valid as long as the that + /// retrieved the is still open. + /// + /// + /// true if this field can be loaded lazily + bool IsLazy { get; } + + /// Returns offset into byte[] segment that is used as value, if Field is not binary + /// returned value is undefined + /// + /// index of the first character in byte[] segment that represents this Field value + int BinaryOffset { get; } + + /// Returns length of byte[] segment that is used as value, if Field is not binary + /// returned value is undefined + /// + /// length of byte[] segment that represents this Field value + int BinaryLength { get; } + + /// Return the raw byte[] for the binary field. Note that + /// you must also call and + /// to know which range of bytes in this + /// returned array belong to the field. + /// + /// reference to the Field value as byte[]. + byte[] GetBinaryValue(); + + /// Return the raw byte[] for the binary field. Note that + /// you must also call and + /// to know which range of bytes in this + /// returned array belong to the field.

+ /// About reuse: if you pass in the result byte[] and it is + /// used, likely the underlying implementation will hold + /// onto this byte[] and return it in future calls to + /// or . + /// So if you subsequently re-use the same byte[] elsewhere + /// it will alter this Fieldable's value. + ///

+ /// User defined buffer that will be used if + /// possible. If this is null or not large enough, a new + /// buffer is allocated + /// + /// reference to the Field value as byte[]. + /// + byte[] GetBinaryValue(byte[] result); + + /// Expert: + /// + /// If set, omit term freq, positions and payloads from + /// postings for this field. + /// + /// + /// NOTE: While this option reduces storage space + /// required in the index, it also means any query + /// requiring positional information, such as + /// or + /// + /// subclasses will silently fail to find results. + /// + bool OmitTermFreqAndPositions { set; get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/LoadFirstFieldSelector.cs b/external/Lucene.Net.Light/src/core/Document/LoadFirstFieldSelector.cs new file mode 100644 index 0000000000..4f353f6f8d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/LoadFirstFieldSelector.cs @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +namespace Lucene.Net.Documents +{ + + /// Load the First field and break. + ///

+ /// See + ///

+ [Serializable] + public class LoadFirstFieldSelector : FieldSelector + { + + public virtual FieldSelectorResult Accept(System.String fieldName) + { + return FieldSelectorResult.LOAD_AND_BREAK; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/MapFieldSelector.cs b/external/Lucene.Net.Light/src/core/Document/MapFieldSelector.cs new file mode 100644 index 0000000000..92a8959abe --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/MapFieldSelector.cs @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Documents +{ + /// A based on a Map of field names to s + [Serializable] + public class MapFieldSelector : FieldSelector + { + internal IDictionary fieldSelections; + + /// Create a a MapFieldSelector + /// maps from field names (String) to s + /// + public MapFieldSelector(IDictionary fieldSelections) + { + this.fieldSelections = fieldSelections; + } + + /// Create a a MapFieldSelector + /// fields to LOAD. List of Strings. All other fields are NO_LOAD. + /// + public MapFieldSelector(IList fields) + { + fieldSelections = new HashMap(fields.Count * 5 / 3); + foreach(var field in fields) + fieldSelections[field] = FieldSelectorResult.LOAD; + } + + /// Create a a MapFieldSelector + /// fields to LOAD. All other fields are NO_LOAD. + /// + public MapFieldSelector(params System.String[] fields) + : this(fields.ToList()) // TODO: this is slow + { + } + + /// Load field according to its associated value in fieldSelections + /// a field name + /// + /// the fieldSelections value that field maps to or NO_LOAD if none. + /// + public virtual FieldSelectorResult Accept(System.String field) + { + FieldSelectorResult selection = fieldSelections[field]; + return selection != FieldSelectorResult.INVALID ? selection : FieldSelectorResult.NO_LOAD; // TODO: See FieldSelectorResult + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/NumberTools.cs b/external/Lucene.Net.Light/src/core/Document/NumberTools.cs new file mode 100644 index 0000000000..f877120841 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/NumberTools.cs @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Search; +using NumericUtils = Lucene.Net.Util.NumericUtils; + +namespace Lucene.Net.Documents +{ + + // do not remove this class in 3.0, it may be needed to decode old indexes! + + /// Provides support for converting longs to Strings, and back again. The strings + /// are structured so that lexicographic sorting order is preserved. + /// + ///

+ /// That is, if l1 is less than l2 for any two longs l1 and l2, then + /// NumberTools.longToString(l1) is lexicographically less than + /// NumberTools.longToString(l2). (Similarly for "greater than" and "equals".) + /// + ///

+ /// This class handles all long values (unlike + /// ). + /// + ///

+ /// For new indexes use instead, which + /// provides a sortable binary representation (prefix encoded) of numeric + /// values. + /// To index and efficiently query numeric values use + /// and . + /// This class is included for use with existing + /// indices and will be removed in a future release (possibly Lucene 4.0). + /// + [Obsolete("For new indexes use NumericUtils instead, which provides a sortable binary representation (prefix encoded) of numeric values. To index and efficiently query numeric values use NumericField and NumericRangeQuery. This class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0).")] + public class NumberTools + { + + private const int RADIX = 36; + + private const char NEGATIVE_PREFIX = '-'; + + // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX + private const char POSITIVE_PREFIX = '0'; + + //NB: this must be less than + /// Equivalent to longToString(Long.MIN_VALUE) +#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE + public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000"; +#else + public static readonly System.String MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000000"; +#endif + + /// Equivalent to longToString(Long.MAX_VALUE) +#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE + public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7"; +#else + public static readonly System.String MAX_STRING_VALUE = POSITIVE_PREFIX + "7fffffffffffffff"; +#endif + + /// The length of (all) strings returned by + public static readonly int STR_SIZE = MIN_STRING_VALUE.Length; + + /// Converts a long to a String suitable for indexing. + public static System.String LongToString(long l) + { + + if (l == System.Int64.MinValue) + { + // special case, because long is not symmetric around zero + return MIN_STRING_VALUE; + } + + System.Text.StringBuilder buf = new System.Text.StringBuilder(STR_SIZE); + + if (l < 0) + { + buf.Append(NEGATIVE_PREFIX); + l = System.Int64.MaxValue + l + 1; + } + else + { + buf.Append(POSITIVE_PREFIX); + } +#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE + System.String num = ToString(l); +#else + System.String num = System.Convert.ToString(l, RADIX); +#endif + + int padLen = STR_SIZE - num.Length - buf.Length; + while (padLen-- > 0) + { + buf.Append('0'); + } + buf.Append(num); + + return buf.ToString(); + } + + /// Converts a String that was returned by back to a + /// long. + /// + /// + /// IllegalArgumentException + /// if the input is null + /// + /// NumberFormatException + /// if the input does not parse (it was not a String returned by + /// longToString()). + /// + public static long StringToLong(System.String str) + { + if (str == null) + { + throw new System.NullReferenceException("string cannot be null"); + } + if (str.Length != STR_SIZE) + { + throw new System.FormatException("string is the wrong size"); + } + + if (str.Equals(MIN_STRING_VALUE)) + { + return System.Int64.MinValue; + } + + char prefix = str[0]; +#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE + long l = ToLong(str.Substring(1)); +#else + long l = System.Convert.ToInt64(str.Substring(1), RADIX); +#endif + + if (prefix == POSITIVE_PREFIX) + { + // nop + } + else if (prefix == NEGATIVE_PREFIX) + { + l = l - System.Int64.MaxValue - 1; + } + else + { + throw new System.FormatException("string does not begin with the correct prefix"); + } + + return l; + } + +#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE + #region BASE36 OPS + static System.String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; + static long[] powersOf36 = + { + 1L, + 36L, + 36L*36L, + 36L*36L*36L, + 36L*36L*36L*36L, + 36L*36L*36L*36L*36L, + 36L*36L*36L*36L*36L*36L, + 36L*36L*36L*36L*36L*36L*36L, + 36L*36L*36L*36L*36L*36L*36L*36L, + 36L*36L*36L*36L*36L*36L*36L*36L*36L, + 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L, + 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L, + 36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L*36L + }; + + public static System.String ToString(long lval) + { + if (lval == 0) + { + return "0"; + } + + int maxStrLen = powersOf36.Length; + long curval = lval; + + char[] tb = new char[maxStrLen]; + int outpos = 0; + for (int i = 0; i < maxStrLen; i++) + { + long pval = powersOf36[maxStrLen - i - 1]; + int pos = (int)(curval / pval); + tb[outpos++] = digits.Substring(pos, 1).ToCharArray()[0]; + curval = curval % pval; + } + if (outpos == 0) + tb[outpos++] = '0'; + return new System.String(tb, 0, outpos).TrimStart('0'); + } + + public static long ToLong(System.String t) + { + long ival = 0; + char[] tb = t.ToCharArray(); + for (int i = 0; i < tb.Length; i++) + { + ival += powersOf36[i] * digits.IndexOf(tb[tb.Length - i - 1]); + } + return ival; + } + #endregion +#endif + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/NumericField.cs b/external/Lucene.Net.Light/src/core/Document/NumericField.cs new file mode 100644 index 0000000000..e77dee47d5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/NumericField.cs @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using Lucene.Net.Search; +using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream; +using TokenStream = Lucene.Net.Analysis.TokenStream; +using NumericUtils = Lucene.Net.Util.NumericUtils; +using FieldCache = Lucene.Net.Search.FieldCache; +using SortField = Lucene.Net.Search.SortField; + +namespace Lucene.Net.Documents +{ + // javadocs + + ///

This class provides a that enables indexing + /// of numeric values for efficient range filtering and + /// sorting. Here's an example usage, adding an int value: + /// + /// document.add(new NumericField(name).setIntValue(value)); + /// + /// + /// For optimal performance, re-use the + /// NumericField and instance for more than + /// one document: + /// + /// + /// NumericField field = new NumericField(name); + /// Document document = new Document(); + /// document.add(field); + /// + /// for(all documents) { + /// ... + /// field.setIntValue(value) + /// writer.addDocument(document); + /// ... + /// } + /// + /// + ///

The .Net native types int, long, + /// float and double are + /// directly supported. However, any value that can be + /// converted into these native types can also be indexed. + /// For example, date/time values represented by a + /// can be translated into a long + /// value using the java.util.Date.getTime method. If you + /// don't need millisecond precision, you can quantize the + /// value, either by dividing the result of + /// java.util.Date.getTime or using the separate getters + /// (for year, month, etc.) to construct an int or + /// long value.

+ /// + ///

To perform range querying or filtering against a + /// NumericField, use or + ///. To sort according to a + /// NumericField, use the normal numeric sort types, eg + /// NumericField values + /// can also be loaded directly from .

+ /// + ///

By default, a NumericField's value is not stored but + /// is indexed for range filtering and sorting. You can use + /// the + /// constructor if you need to change these defaults.

+ /// + ///

You may add the same field name as a NumericField to + /// the same document more than once. Range querying and + /// filtering will be the logical OR of all values; so a range query + /// will hit all documents that have at least one value in + /// the range. However sort behavior is not defined. If you need to sort, + /// you should separately index a single-valued NumericField.

+ /// + ///

A NumericField will consume somewhat more disk space + /// in the index than an ordinary single-valued field. + /// However, for a typical index that includes substantial + /// textual content per document, this increase will likely + /// be in the noise.

+ /// + ///

Within Lucene, each numeric value is indexed as a + /// trie structure, where each term is logically + /// assigned to larger and larger pre-defined brackets (which + /// are simply lower-precision representations of the value). + /// The step size between each successive bracket is called the + /// precisionStep, measured in bits. Smaller + /// precisionStep values result in larger number + /// of brackets, which consumes more disk space in the index + /// but may result in faster range search performance. The + /// default value, 4, was selected for a reasonable tradeoff + /// of disk space consumption versus performance. You can + /// use the expert constructor + /// if you'd + /// like to change the value. Note that you must also + /// specify a congruent value when creating + /// or . + /// For low cardinality fields larger precision steps are good. + /// If the cardinality is < 100, it is fair + /// to use , which produces one + /// term per value. + /// + ///

For more information on the internals of numeric trie + /// indexing, including the precisionStep + /// configuration, see . The format of + /// indexed values is described in . + /// + ///

If you only need to sort by numeric value, and never + /// run range querying/filtering, you can index using a + /// precisionStep of . + /// This will minimize disk space consumed.

+ /// + ///

More advanced users can instead use + /// directly, when indexing numbers. This + /// class is a wrapper around this token stream type for + /// easier, more intuitive usage.

+ /// + ///

NOTE: This class is only used during + /// indexing. When retrieving the stored field value from a + /// instance after search, you will get a + /// conventional instance where the numeric + /// values are returned as s (according to + /// toString(value) of the used data type). + /// + ///

NOTE: This API is + /// experimental and might change in incompatible ways in the + /// next release. + /// + ///

+ /// 2.9 + /// + [Serializable] + public sealed class NumericField:AbstractField + { + + new private readonly NumericTokenStream tokenStream; + + /// Creates a field for numeric values using the default precisionStep + /// (4). The instance is not yet initialized with + /// a numeric value, before indexing a document containing this field, + /// set a value using the various set???Value() methods. + /// This constructor creates an indexed, but not stored field. + /// + /// the field name + /// + public NumericField(System.String name):this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true) + { + } + + /// Creates a field for numeric values using the default precisionStep + /// (4). The instance is not yet initialized with + /// a numeric value, before indexing a document containing this field, + /// set a value using the various set???Value() methods. + /// + /// the field name + /// + /// if the field should be stored in plain text form + /// (according to toString(value) of the used data type) + /// + /// if the field should be indexed using + /// + public NumericField(System.String name, Field.Store store, bool index):this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index) + { + } + + /// Creates a field for numeric values with the specified + /// precisionStep. The instance is not yet initialized with + /// a numeric value, before indexing a document containing this field, + /// set a value using the various set???Value() methods. + /// This constructor creates an indexed, but not stored field. + /// + /// the field name + /// + /// the used precision step + /// + public NumericField(System.String name, int precisionStep):this(name, precisionStep, Field.Store.NO, true) + { + } + + /// Creates a field for numeric values with the specified + /// precisionStep. The instance is not yet initialized with + /// a numeric value, before indexing a document containing this field, + /// set a value using the various set???Value() methods. + /// + /// the field name + /// + /// the used precision step + /// + /// if the field should be stored in plain text form + /// (according to toString(value) of the used data type) + /// + /// if the field should be indexed using + /// + public NumericField(System.String name, int precisionStep, Field.Store store, bool index):base(name, store, index?Field.Index.ANALYZED_NO_NORMS:Field.Index.NO, Field.TermVector.NO) + { + OmitTermFreqAndPositions = true; + tokenStream = new NumericTokenStream(precisionStep); + } + + /// Returns a for indexing the numeric value. + public override TokenStream TokenStreamValue + { + get { return IsIndexed ? tokenStream : null; } + } + + /// Returns always null for numeric fields + public override byte[] GetBinaryValue(byte[] result) + { + return null; + } + + /// Returns always null for numeric fields + public override TextReader ReaderValue + { + get { return null; } + } + + /// Returns the numeric value as a string (how it is stored, when is chosen). + public override string StringValue + { + get { return (fieldsData == null) ? null : fieldsData.ToString(); } + } + + /// Returns the current numeric value as a subclass of , null if not yet initialized. + public ValueType NumericValue + { + get { return (System.ValueType) fieldsData; } + } + + /// Initializes the field with the supplied long value. + /// the numeric value + /// + /// this instance, because of this you can use it the following way: + /// document.add(new NumericField(name, precisionStep).SetLongValue(value)) + /// + public NumericField SetLongValue(long value_Renamed) + { + tokenStream.SetLongValue(value_Renamed); + fieldsData = value_Renamed; + return this; + } + + /// Initializes the field with the supplied int value. + /// the numeric value + /// + /// this instance, because of this you can use it the following way: + /// document.add(new NumericField(name, precisionStep).setIntValue(value)) + /// + public NumericField SetIntValue(int value_Renamed) + { + tokenStream.SetIntValue(value_Renamed); + fieldsData = value_Renamed; + return this; + } + + /// Initializes the field with the supplied double value. + /// the numeric value + /// + /// this instance, because of this you can use it the following way: + /// document.add(new NumericField(name, precisionStep).setDoubleValue(value)) + /// + public NumericField SetDoubleValue(double value_Renamed) + { + tokenStream.SetDoubleValue(value_Renamed); + fieldsData = value_Renamed; + return this; + } + + /// Initializes the field with the supplied float value. + /// the numeric value + /// + /// this instance, because of this you can use it the following way: + /// document.add(new NumericField(name, precisionStep).setFloatValue(value)) + /// + public NumericField SetFloatValue(float value_Renamed) + { + tokenStream.SetFloatValue(value_Renamed); + fieldsData = value_Renamed; + return this; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Document/SetBasedFieldSelector.cs b/external/Lucene.Net.Light/src/core/Document/SetBasedFieldSelector.cs new file mode 100644 index 0000000000..14e3e02cd8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Document/SetBasedFieldSelector.cs @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Documents +{ + /// Declare what fields to load normally and what fields to load lazily + /// + /// + /// + [Serializable] + public class SetBasedFieldSelector : FieldSelector + { + private ISet fieldsToLoad; + private ISet lazyFieldsToLoad; + + /// Pass in the Set of names to load and the Set of names to load lazily. If both are null, the + /// Document will not have any on it. + /// + /// A Set of field names to load. May be empty, but not null + /// + /// A Set of field names to load lazily. May be empty, but not null + /// + public SetBasedFieldSelector(ISet fieldsToLoad, ISet lazyFieldsToLoad) + { + this.fieldsToLoad = fieldsToLoad; + this.lazyFieldsToLoad = lazyFieldsToLoad; + } + + /// Indicate whether to load the field with the given name or not. If the is not in either of the + /// initializing Sets, then is returned. If a Field name + /// is in both fieldsToLoad and lazyFieldsToLoad, lazy has precedence. + /// + /// + /// The name to check + /// + /// The + /// + public virtual FieldSelectorResult Accept(System.String fieldName) + { + FieldSelectorResult result = FieldSelectorResult.NO_LOAD; + if (fieldsToLoad.Contains(fieldName) == true) + { + result = FieldSelectorResult.LOAD; + } + if (lazyFieldsToLoad.Contains(fieldName) == true) + { + result = FieldSelectorResult.LAZY_LOAD; + } + return result; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/AbstractAllTermDocs.cs b/external/Lucene.Net.Light/src/core/Index/AbstractAllTermDocs.cs new file mode 100644 index 0000000000..935b7faad7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/AbstractAllTermDocs.cs @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + /// + /// Base class for enumerating all but deleted docs. + /// + ///

NOTE: this class is meant only to be used internally + /// by Lucene; it's only public so it can be shared across + /// packages. This means the API is freely subject to + /// change, and, the class could be removed entirely, in any + /// Lucene release. Use directly at your own risk! */ + ///

+ public abstract class AbstractAllTermDocs : TermDocs + { + protected int maxDoc; + protected int internalDoc = -1; + + protected AbstractAllTermDocs(int maxDoc) + { + this.maxDoc = maxDoc; + } + + public void Seek(Term term) + { + if (term == null) + { + internalDoc = -1; + } + else + { + throw new NotSupportedException(); + } + } + + public void Seek(TermEnum termEnum) + { + throw new NotSupportedException(); + } + + public int Doc + { + get { return internalDoc; } + } + + public int Freq + { + get { return 1; } + } + + public bool Next() + { + return SkipTo(internalDoc + 1); + } + + public int Read(int[] docs, int[] freqs) + { + int length = docs.Length; + int i = 0; + while (i < length && internalDoc < maxDoc) + { + if (!IsDeleted(internalDoc)) + { + docs[i] = internalDoc; + freqs[i] = 1; + ++i; + } + internalDoc++; + } + return i; + } + + public bool SkipTo(int target) + { + internalDoc = target; + while (internalDoc < maxDoc) + { + if (!IsDeleted(internalDoc)) + { + return true; + } + internalDoc++; + } + return false; + } + + public void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + + public abstract bool IsDeleted(int doc); + } +} diff --git a/external/Lucene.Net.Light/src/core/Index/AllTermDocs.cs b/external/Lucene.Net.Light/src/core/Index/AllTermDocs.cs new file mode 100644 index 0000000000..da5f16dd56 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/AllTermDocs.cs @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using BitVector = Lucene.Net.Util.BitVector; + +namespace Lucene.Net.Index +{ + + class AllTermDocs : AbstractAllTermDocs + { + protected internal BitVector deletedDocs; + + protected internal AllTermDocs(SegmentReader parent) : base(parent.MaxDoc) + { + lock (parent) + { + this.deletedDocs = parent.deletedDocs; + } + } + + protected override void Dispose(bool disposing) + { + // Do nothing. + } + + public override bool IsDeleted(int doc) + { + return deletedDocs != null && deletedDocs.Get(doc); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/BufferedDeletes.cs b/external/Lucene.Net.Light/src/core/Index/BufferedDeletes.cs new file mode 100644 index 0000000000..52ef1dfdd1 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/BufferedDeletes.cs @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; +using Lucene.Net.Search; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// Holds buffered deletes, by docID, term or query. We + /// hold two instances of this class: one for the deletes + /// prior to the last flush, the other for deletes after + /// the last flush. This is so if we need to abort + /// (discard all buffered docs) we can also discard the + /// buffered deletes yet keep the deletes done during + /// previously flushed segments. + /// + class BufferedDeletes + { + internal int numTerms; + internal IDictionary terms = null; + internal IDictionary queries = new HashMap(); + internal List docIDs = new List(); + internal long bytesUsed; + internal bool doTermSort; + + public BufferedDeletes(bool doTermSort) + { + this.doTermSort = doTermSort; + if (doTermSort) + { + //TODO: Used in place of TreeMap + terms = new SortedDictionary(); + } + else + { + terms = new HashMap(); + } + } + + + // Number of documents a delete term applies to. + internal sealed class Num + { + internal int num; + + internal Num(int num) + { + this.num = num; + } + + internal int GetNum() + { + return num; + } + + internal void SetNum(int num) + { + // Only record the new number if it's greater than the + // current one. This is important because if multiple + // threads are replacing the same doc at nearly the + // same time, it's possible that one thread that got a + // higher docID is scheduled before the other + // threads. + if (num > this.num) + this.num = num; + } + } + + internal virtual int Size() + { + // We use numTerms not terms.size() intentionally, so + // that deletes by the same term multiple times "count", + // ie if you ask to flush every 1000 deletes then even + // dup'd terms are counted towards that 1000 + return numTerms + queries.Count + docIDs.Count; + } + + internal virtual void Update(BufferedDeletes @in) + { + numTerms += @in.numTerms; + bytesUsed += @in.bytesUsed; + foreach (KeyValuePair term in @in.terms) + { + terms[term.Key] = term.Value; + } + foreach (KeyValuePair term in @in.queries) + { + queries[term.Key] = term.Value; + } + + docIDs.AddRange(@in.docIDs); + @in.Clear(); + } + + internal virtual void Clear() + { + terms.Clear(); + queries.Clear(); + docIDs.Clear(); + numTerms = 0; + bytesUsed = 0; + } + + internal virtual void AddBytesUsed(long b) + { + bytesUsed += b; + } + + internal virtual bool Any() + { + return terms.Count > 0 || docIDs.Count > 0 || queries.Count > 0; + } + + // Remaps all buffered deletes based on a completed + // merge + internal virtual void Remap(MergeDocIDRemapper mapper, SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) + { + lock (this) + { + IDictionary newDeleteTerms; + + // Remap delete-by-term + if (terms.Count > 0) + { + if (doTermSort) + { + newDeleteTerms = new SortedDictionary(); + } + else + { + newDeleteTerms = new HashMap(); + } + foreach(var entry in terms) + { + Num num = entry.Value; + newDeleteTerms[entry.Key] = new Num(mapper.Remap(num.GetNum())); + } + } + else + newDeleteTerms = null; + + // Remap delete-by-docID + List newDeleteDocIDs; + + if (docIDs.Count > 0) + { + newDeleteDocIDs = new List(docIDs.Count); + foreach(int num in docIDs) + { + newDeleteDocIDs.Add(mapper.Remap(num)); + } + } + else + newDeleteDocIDs = null; + + // Remap delete-by-query + HashMap newDeleteQueries; + + if (queries.Count > 0) + { + newDeleteQueries = new HashMap(queries.Count); + foreach(var entry in queries) + { + int num = entry.Value; + newDeleteQueries[entry.Key] = mapper.Remap(num); + } + } + else + newDeleteQueries = null; + + if (newDeleteTerms != null) + terms = newDeleteTerms; + if (newDeleteDocIDs != null) + docIDs = newDeleteDocIDs; + if (newDeleteQueries != null) + queries = newDeleteQueries; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/ByteBlockPool.cs b/external/Lucene.Net.Light/src/core/Index/ByteBlockPool.cs new file mode 100644 index 0000000000..041c756fbc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/ByteBlockPool.cs @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* Class that Posting and PostingVector use to write byte +* streams into shared fixed-size byte[] arrays. The idea +* is to allocate slices of increasing lengths For +* example, the first slice is 5 bytes, the next slice is +* 14, etc. We start by writing our bytes into the first +* 5 bytes. When we hit the end of the slice, we allocate +* the next slice and then write the address of the new +* slice into the last 4 bytes of the previous slice (the +* "forwarding address"). +* +* Each slice is filled with 0's initially, and we mark +* the end with a non-zero byte. This way the methods +* that are writing into the slice don't need to record +* its length and instead allocate a new slice once they +* hit a non-zero byte. */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + sealed public class ByteBlockPool + { + private void InitBlock() + { + byteUpto = DocumentsWriter.BYTE_BLOCK_SIZE; + } + + public /*internal*/ abstract class Allocator + { + public /*internal*/ abstract void RecycleByteBlocks(byte[][] blocks, int start, int end); + public /*internal*/ abstract void RecycleByteBlocks(IList blocks); + public /*internal*/ abstract byte[] GetByteBlock(bool trackAllocations); + } + + public byte[][] buffers = new byte[10][]; + + internal int bufferUpto = - 1; // Which buffer we are upto + public int byteUpto; // Where we are in head buffer + + public byte[] buffer; // Current head buffer + public int byteOffset = - DocumentsWriter.BYTE_BLOCK_SIZE; // Current head offset + + private readonly bool trackAllocations; + private readonly Allocator allocator; + + public ByteBlockPool(Allocator allocator, bool trackAllocations) + { + InitBlock(); + this.allocator = allocator; + this.trackAllocations = trackAllocations; + } + + public void Reset() + { + if (bufferUpto != - 1) + { + // We allocated at least one buffer + + for (int i = 0; i < bufferUpto; i++) + // Fully zero fill buffers that we fully used + System.Array.Clear(buffers[i], 0, buffers[i].Length); + + // Partial zero fill the final buffer + System.Array.Clear(buffers[bufferUpto], 0, byteUpto); + + if (bufferUpto > 0) + // Recycle all but the first buffer + allocator.RecycleByteBlocks(buffers, 1, 1 + bufferUpto); + + // Re-use the first buffer + bufferUpto = 0; + byteUpto = 0; + byteOffset = 0; + buffer = buffers[0]; + } + } + + public void NextBuffer() + { + if (1 + bufferUpto == buffers.Length) + { + var newBuffers = new byte[(int) (buffers.Length * 1.5)][]; + Array.Copy(buffers, 0, newBuffers, 0, buffers.Length); + buffers = newBuffers; + } + buffer = buffers[1 + bufferUpto] = allocator.GetByteBlock(trackAllocations); + bufferUpto++; + + byteUpto = 0; + byteOffset += DocumentsWriter.BYTE_BLOCK_SIZE; + } + + public int NewSlice(int size) + { + if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE - size) + NextBuffer(); + int upto = byteUpto; + byteUpto += size; + buffer[byteUpto - 1] = 16; + return upto; + } + + // Size of each slice. These arrays should be at most 16 + // elements (index is encoded with 4 bits). First array + // is just a compact way to encode X+1 with a max. Second + // array is the length of each slice, ie first slice is 5 + // bytes, next slice is 14 bytes, etc. + internal static readonly int[] nextLevelArray = new int[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; + internal static readonly int[] levelSizeArray = new int[]{5, 14, 20, 30, 40, 40, 80, 80, 120, 200}; + internal static readonly int FIRST_LEVEL_SIZE = levelSizeArray[0]; + public readonly static int FIRST_LEVEL_SIZE_For_NUnit_Test = levelSizeArray[0]; + + public int AllocSlice(byte[] slice, int upto) + { + + int level = slice[upto] & 15; + int newLevel = nextLevelArray[level]; + int newSize = levelSizeArray[newLevel]; + + // Maybe allocate another block + if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE - newSize) + NextBuffer(); + + int newUpto = byteUpto; + int offset = newUpto + byteOffset; + byteUpto += newSize; + + // Copy forward the past 3 bytes (which we are about + // to overwrite with the forwarding address): + buffer[newUpto] = slice[upto - 3]; + buffer[newUpto + 1] = slice[upto - 2]; + buffer[newUpto + 2] = slice[upto - 1]; + + // Write forwarding address at end of last slice: + slice[upto - 3] = (byte) (Number.URShift(offset, 24)); + slice[upto - 2] = (byte) (Number.URShift(offset, 16)); + slice[upto - 1] = (byte) (Number.URShift(offset, 8)); + slice[upto] = (byte) offset; + + // Write new level: + buffer[byteUpto - 1] = (byte) (16 | newLevel); + + return newUpto + 3; + } + + public static int FIRST_LEVEL_SIZE_ForNUnit + { + get { return FIRST_LEVEL_SIZE; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/ByteSliceReader.cs b/external/Lucene.Net.Light/src/core/Index/ByteSliceReader.cs new file mode 100644 index 0000000000..8b672fe37f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/ByteSliceReader.cs @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; + +namespace Lucene.Net.Index +{ + + /* IndexInput that knows how to read the byte slices written + * by Posting and PostingVector. We read the bytes in + * each slice until we hit the end of that slice at which + * point we read the forwarding address of the next slice + * and then jump to it.*/ + public sealed class ByteSliceReader : IndexInput + { + internal ByteBlockPool pool; + internal int bufferUpto; + internal byte[] buffer; + public int upto; + internal int limit; + internal int level; + public int bufferOffset; + + public int endIndex; + + public void Init(ByteBlockPool pool, int startIndex, int endIndex) + { + + System.Diagnostics.Debug.Assert(endIndex - startIndex >= 0); + System.Diagnostics.Debug.Assert(startIndex >= 0); + System.Diagnostics.Debug.Assert(endIndex >= 0); + + this.pool = pool; + this.endIndex = endIndex; + + level = 0; + bufferUpto = startIndex / DocumentsWriter.BYTE_BLOCK_SIZE; + bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE; + buffer = pool.buffers[bufferUpto]; + upto = startIndex & DocumentsWriter.BYTE_BLOCK_MASK; + + int firstSize = ByteBlockPool.levelSizeArray[0]; + + if (startIndex + firstSize >= endIndex) + { + // There is only this one slice to read + limit = endIndex & DocumentsWriter.BYTE_BLOCK_MASK; + } + else + limit = upto + firstSize - 4; + } + + public bool Eof() + { + System.Diagnostics.Debug.Assert(upto + bufferOffset <= endIndex); + return upto + bufferOffset == endIndex; + } + + public override byte ReadByte() + { + System.Diagnostics.Debug.Assert(!Eof()); + System.Diagnostics.Debug.Assert(upto <= limit); + if (upto == limit) + NextSlice(); + return buffer[upto++]; + } + + public long WriteTo(IndexOutput @out) + { + long size = 0; + while (true) + { + if (limit + bufferOffset == endIndex) + { + System.Diagnostics.Debug.Assert(endIndex - bufferOffset >= upto); + @out.WriteBytes(buffer, upto, limit - upto); + size += limit - upto; + break; + } + else + { + @out.WriteBytes(buffer, upto, limit - upto); + size += limit - upto; + NextSlice(); + } + } + + return size; + } + + public void NextSlice() + { + + // Skip to our next slice + int nextIndex = ((buffer[limit] & 0xff) << 24) + ((buffer[1 + limit] & 0xff) << 16) + ((buffer[2 + limit] & 0xff) << 8) + (buffer[3 + limit] & 0xff); + + level = ByteBlockPool.nextLevelArray[level]; + int newSize = ByteBlockPool.levelSizeArray[level]; + + bufferUpto = nextIndex / DocumentsWriter.BYTE_BLOCK_SIZE; + bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE; + + buffer = pool.buffers[bufferUpto]; + upto = nextIndex & DocumentsWriter.BYTE_BLOCK_MASK; + + if (nextIndex + newSize >= endIndex) + { + // We are advancing to the final slice + System.Diagnostics.Debug.Assert(endIndex - nextIndex > 0); + limit = endIndex - bufferOffset; + } + else + { + // This is not the final slice (subtract 4 for the + // forwarding address at the end of this new slice) + limit = upto + newSize - 4; + } + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + while (len > 0) + { + int numLeft = limit - upto; + if (numLeft < len) + { + // Read entire slice + Array.Copy(buffer, upto, b, offset, numLeft); + offset += numLeft; + len -= numLeft; + NextSlice(); + } + else + { + // This slice is the last one + Array.Copy(buffer, upto, b, offset, len); + upto += len; + break; + } + } + } + + public override long FilePointer + { + get { throw new NotImplementedException(); } + } + + public override long Length() + { + throw new NotImplementedException(); + } + public override void Seek(long pos) + { + throw new NotImplementedException(); + } + + protected override void Dispose(bool disposing) + { + // Do nothing... + } + + override public Object Clone() + { + System.Diagnostics.Debug.Fail("Port issue:", "Let see if we need this ByteSliceReader.Clone()"); // {{Aroush-2.9}} + return null; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/ByteSliceWriter.cs b/external/Lucene.Net.Light/src/core/Index/ByteSliceWriter.cs new file mode 100644 index 0000000000..86bbca0e73 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/ByteSliceWriter.cs @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + /// Class to write byte streams into slices of shared + /// byte[]. This is used by DocumentsWriter to hold the + /// posting list for many terms in RAM. + /// + public sealed class ByteSliceWriter + { + private byte[] slice; + private int upto; + private readonly ByteBlockPool pool; + + internal int offset0; + + public ByteSliceWriter(ByteBlockPool pool) + { + this.pool = pool; + } + + /// Set up the writer to write at address. + public void Init(int address) + { + slice = pool.buffers[address >> DocumentsWriter.BYTE_BLOCK_SHIFT]; + System.Diagnostics.Debug.Assert(slice != null); + upto = address & DocumentsWriter.BYTE_BLOCK_MASK; + offset0 = address; + System.Diagnostics.Debug.Assert(upto < slice.Length); + } + + /// Write byte into byte slice stream + public void WriteByte(byte b) + { + System.Diagnostics.Debug.Assert(slice != null); + if (slice[upto] != 0) + { + upto = pool.AllocSlice(slice, upto); + slice = pool.buffer; + offset0 = pool.byteOffset; + System.Diagnostics.Debug.Assert(slice != null); + } + slice[upto++] = b; + System.Diagnostics.Debug.Assert(upto != slice.Length); + } + + public void WriteBytes(byte[] b, int offset, int len) + { + int offsetEnd = offset + len; + while (offset < offsetEnd) + { + if (slice[upto] != 0) + { + // End marker + upto = pool.AllocSlice(slice, upto); + slice = pool.buffer; + offset0 = pool.byteOffset; + } + + slice[upto++] = b[offset++]; + System.Diagnostics.Debug.Assert(upto != slice.Length); + } + } + + public int Address + { + get { return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); } + } + + public void WriteVInt(int i) + { + while ((i & ~ 0x7F) != 0) + { + WriteByte((byte) ((i & 0x7f) | 0x80)); + i = Number.URShift(i, 7); + } + WriteByte((byte) i); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/CharBlockPool.cs b/external/Lucene.Net.Light/src/core/Index/CharBlockPool.cs new file mode 100644 index 0000000000..0631fe0e92 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/CharBlockPool.cs @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + sealed class CharBlockPool + { + private void InitBlock() + { + charUpto = DocumentsWriter.CHAR_BLOCK_SIZE; + } + + public char[][] buffers = new char[10][]; + internal int numBuffer; + + internal int bufferUpto = - 1; // Which buffer we are upto + public int charUpto; // Where we are in head buffer + + public char[] buffer; // Current head buffer + public int charOffset = - DocumentsWriter.CHAR_BLOCK_SIZE; // Current head offset + private readonly DocumentsWriter docWriter; + + public CharBlockPool(DocumentsWriter docWriter) + { + InitBlock(); + this.docWriter = docWriter; + } + + public void Reset() + { + docWriter.RecycleCharBlocks(buffers, 1 + bufferUpto); + bufferUpto = - 1; + charUpto = DocumentsWriter.CHAR_BLOCK_SIZE; + charOffset = - DocumentsWriter.CHAR_BLOCK_SIZE; + } + + public void NextBuffer() + { + if (1 + bufferUpto == buffers.Length) + { + var newBuffers = new char[(int) (buffers.Length * 1.5)][]; + Array.Copy(buffers, 0, newBuffers, 0, buffers.Length); + buffers = newBuffers; + } + buffer = buffers[1 + bufferUpto] = docWriter.GetCharBlock(); + bufferUpto++; + + charUpto = 0; + charOffset += DocumentsWriter.CHAR_BLOCK_SIZE; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/CheckIndex.cs b/external/Lucene.Net.Light/src/core/Index/CheckIndex.cs new file mode 100644 index 0000000000..89179036a6 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/CheckIndex.cs @@ -0,0 +1,1017 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.IO; +using Lucene.Net.Support; +using AbstractField = Lucene.Net.Documents.AbstractField; +using Document = Lucene.Net.Documents.Document; +using Directory = Lucene.Net.Store.Directory; +using FSDirectory = Lucene.Net.Store.FSDirectory; +using IndexInput = Lucene.Net.Store.IndexInput; + +namespace Lucene.Net.Index +{ + + /// Basic tool and API to check the health of an index and + /// write a new segments file that removes reference to + /// problematic segments. + /// + ///

As this tool checks every byte in the index, on a large + /// index it can take quite a long time to run. + /// + ///

WARNING: this tool and API is new and + /// experimental and is subject to suddenly change in the + /// next release. Please make a complete backup of your + /// index before using this to fix your index! + ///

+ public class CheckIndex + { + private StreamWriter infoStream; + private readonly Directory dir; + + /// Returned from detailing the health and status of the index. + /// + ///

WARNING: this API is new and experimental and is + /// subject to suddenly change in the next release. + /// + ///

+ + public class Status + { + + /// True if no problems were found with the index. + public bool clean; + + /// True if we were unable to locate and load the segments_N file. + public bool missingSegments; + + /// True if we were unable to open the segments_N file. + public bool cantOpenSegments; + + /// True if we were unable to read the version number from segments_N file. + public bool missingSegmentVersion; + + /// Name of latest segments_N file in the index. + public System.String segmentsFileName; + + /// Number of segments in the index. + public int numSegments; + + /// String description of the version of the index. + public System.String segmentFormat; + + /// Empty unless you passed specific segments list to check as optional 3rd argument. + /// + /// CheckIndex.CheckIndex_Renamed_Method(System.Collections.IList) + /// + public List segmentsChecked = new List(); + + /// True if the index was created with a newer version of Lucene than the CheckIndex tool. + public bool toolOutOfDate; + + /// List of instances, detailing status of each segment. + public IList segmentInfos = new List(); + + /// Directory index is in. + public Directory dir; + + /// SegmentInfos instance containing only segments that + /// had no problems (this is used with the + /// method to repair the index. + /// + internal SegmentInfos newSegments; + + /// How many documents will be lost to bad segments. + public int totLoseDocCount; + + /// How many bad segments were found. + public int numBadSegments; + + /// True if we checked only specific segments () + /// was called with non-null + /// argument). + /// + public bool partial; + + /// Holds the userData of the last commit in the index + public IDictionary userData; + + /// Holds the status of each segment in the index. + /// See . + /// + ///

WARNING: this API is new and experimental and is + /// subject to suddenly change in the next release. + ///

+ public class SegmentInfoStatus + { + /// Name of the segment. + public System.String name; + + /// Document count (does not take deletions into account). + public int docCount; + + /// True if segment is compound file format. + public bool compound; + + /// Number of files referenced by this segment. + public int numFiles; + + /// Net size (MB) of the files referenced by this + /// segment. + /// + public double sizeMB; + + /// Doc store offset, if this segment shares the doc + /// store files (stored fields and term vectors) with + /// other segments. This is -1 if it does not share. + /// + public int docStoreOffset = - 1; + + /// String of the shared doc store segment, or null if + /// this segment does not share the doc store files. + /// + public System.String docStoreSegment; + + /// True if the shared doc store files are compound file + /// format. + /// + public bool docStoreCompoundFile; + + /// True if this segment has pending deletions. + public bool hasDeletions; + + /// Name of the current deletions file name. + public System.String deletionsFileName; + + /// Number of deleted documents. + public int numDeleted; + + /// True if we were able to open a SegmentReader on this + /// segment. + /// + public bool openReaderPassed; + + /// Number of fields in this segment. + internal int numFields; + + /// True if at least one of the fields in this segment + /// does not omitTermFreqAndPositions. + /// + /// + /// + public bool hasProx; + + /// Map<String, String> that includes certain + /// debugging details that IndexWriter records into + /// each segment it creates + /// + public IDictionary diagnostics; + + /// Status for testing of field norms (null if field norms could not be tested). + public FieldNormStatus fieldNormStatus; + + /// Status for testing of indexed terms (null if indexed terms could not be tested). + public TermIndexStatus termIndexStatus; + + /// Status for testing of stored fields (null if stored fields could not be tested). + public StoredFieldStatus storedFieldStatus; + + /// Status for testing of term vectors (null if term vectors could not be tested). + public TermVectorStatus termVectorStatus; + } + + /// Status from testing field norms. + public sealed class FieldNormStatus + { + /// Number of fields successfully tested + public long totFields = 0L; + + /// Exception thrown during term index test (null on success) + public System.Exception error = null; + } + + /// Status from testing term index. + public sealed class TermIndexStatus + { + /// Total term count + public long termCount = 0L; + + /// Total frequency across all terms. + public long totFreq = 0L; + + /// Total number of positions. + public long totPos = 0L; + + /// Exception thrown during term index test (null on success) + public System.Exception error = null; + } + + /// Status from testing stored fields. + public sealed class StoredFieldStatus + { + + /// Number of documents tested. + public int docCount = 0; + + /// Total number of stored fields tested. + public long totFields = 0; + + /// Exception thrown during stored fields test (null on success) + public System.Exception error = null; + } + + /// Status from testing stored fields. + public sealed class TermVectorStatus + { + + /// Number of documents tested. + public int docCount = 0; + + /// Total number of term vectors tested. + public long totVectors = 0; + + /// Exception thrown during term vector test (null on success) + public System.Exception error = null; + } + } + + /// Create a new CheckIndex on the directory. + public CheckIndex(Directory dir) + { + this.dir = dir; + infoStream = null; + } + + /// Set infoStream where messages should go. If null, no + /// messages are printed + /// + public virtual void SetInfoStream(StreamWriter @out) + { + infoStream = @out; + } + + private void Msg(System.String msg) + { + if (infoStream != null) + infoStream.WriteLine(msg); + } + + private class MySegmentTermDocs:SegmentTermDocs + { + + internal int delCount; + + internal MySegmentTermDocs(SegmentReader p):base(p) + { + } + + public override void Seek(Term term) + { + base.Seek(term); + delCount = 0; + } + + protected internal override void SkippingDoc() + { + delCount++; + } + } + + /// Returns a instance detailing + /// the state of the index. + /// + ///

As this method checks every byte in the index, on a large + /// index it can take quite a long time to run. + /// + ///

WARNING: make sure + /// you only call this when the index is not opened by any + /// writer. + ///

+ public virtual Status CheckIndex_Renamed_Method() + { + return CheckIndex_Renamed_Method(null); + } + + /// Returns a instance detailing + /// the state of the index. + /// + /// + /// list of specific segment names to check + /// + ///

As this method checks every byte in the specified + /// segments, on a large index it can take quite a long + /// time to run. + /// + ///

WARNING: make sure + /// you only call this when the index is not opened by any + /// writer. + /// + public virtual Status CheckIndex_Renamed_Method(List onlySegments) + { + System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; + SegmentInfos sis = new SegmentInfos(); + Status result = new Status(); + result.dir = dir; + try + { + sis.Read(dir); + } + catch (System.Exception t) + { + Msg("ERROR: could not read any segments file in directory"); + result.missingSegments = true; + if (infoStream != null) + infoStream.WriteLine(t.StackTrace); + return result; + } + + int numSegments = sis.Count; + var segmentsFileName = sis.GetCurrentSegmentFileName(); + IndexInput input = null; + try + { + input = dir.OpenInput(segmentsFileName); + } + catch (System.Exception t) + { + Msg("ERROR: could not open segments file in directory"); + if (infoStream != null) + infoStream.WriteLine(t.StackTrace); + result.cantOpenSegments = true; + return result; + } + int format = 0; + try + { + format = input.ReadInt(); + } + catch (System.Exception t) + { + Msg("ERROR: could not read segment file version in directory"); + if (infoStream != null) + infoStream.WriteLine(t.StackTrace); + result.missingSegmentVersion = true; + return result; + } + finally + { + if (input != null) + input.Close(); + } + + System.String sFormat = ""; + bool skip = false; + + if (format == SegmentInfos.FORMAT) + sFormat = "FORMAT [Lucene Pre-2.1]"; + if (format == SegmentInfos.FORMAT_LOCKLESS) + sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; + else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) + sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; + else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) + sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; + else + { + if (format == SegmentInfos.FORMAT_CHECKSUM) + sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; + else if (format == SegmentInfos.FORMAT_DEL_COUNT) + sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; + else if (format == SegmentInfos.FORMAT_HAS_PROX) + sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; + else if (format == SegmentInfos.FORMAT_USER_DATA) + sFormat = "FORMAT_USER_DATA [Lucene 2.9]"; + else if (format == SegmentInfos.FORMAT_DIAGNOSTICS) + sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]"; + else if (format < SegmentInfos.CURRENT_FORMAT) + { + sFormat = "int=" + format + " [newer version of Lucene than this tool]"; + skip = true; + } + else + { + sFormat = format + " [Lucene 1.3 or prior]"; + } + } + + result.segmentsFileName = segmentsFileName; + result.numSegments = numSegments; + result.segmentFormat = sFormat; + result.userData = sis.UserData; + System.String userDataString; + if (sis.UserData.Count > 0) + { + userDataString = " userData=" + CollectionsHelper.CollectionToString(sis.UserData); + } + else + { + userDataString = ""; + } + + Msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString); + + if (onlySegments != null) + { + result.partial = true; + if (infoStream != null) + infoStream.Write("\nChecking only these segments:"); + foreach(string s in onlySegments) + { + if (infoStream != null) + { + infoStream.Write(" " + s); + } + } + result.segmentsChecked.AddRange(onlySegments); + Msg(":"); + } + + if (skip) + { + Msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); + result.toolOutOfDate = true; + return result; + } + + + result.newSegments = (SegmentInfos) sis.Clone(); + result.newSegments.Clear(); + + for (int i = 0; i < numSegments; i++) + { + SegmentInfo info = sis.Info(i); + if (onlySegments != null && !onlySegments.Contains(info.name)) + continue; + var segInfoStat = new Status.SegmentInfoStatus(); + result.segmentInfos.Add(segInfoStat); + Msg(" " + (1 + i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); + segInfoStat.name = info.name; + segInfoStat.docCount = info.docCount; + + int toLoseDocCount = info.docCount; + + SegmentReader reader = null; + + try + { + Msg(" compound=" + info.GetUseCompoundFile()); + segInfoStat.compound = info.GetUseCompoundFile(); + Msg(" hasProx=" + info.HasProx); + segInfoStat.hasProx = info.HasProx; + Msg(" numFiles=" + info.Files().Count); + segInfoStat.numFiles = info.Files().Count; + Msg(System.String.Format(nf, " size (MB)={0:f}", new System.Object[] { (info.SizeInBytes() / (1024.0 * 1024.0)) })); + segInfoStat.sizeMB = info.SizeInBytes() / (1024.0 * 1024.0); + IDictionary diagnostics = info.Diagnostics; + segInfoStat.diagnostics = diagnostics; + if (diagnostics.Count > 0) + { + Msg(" diagnostics = " + CollectionsHelper.CollectionToString(diagnostics)); + } + + int docStoreOffset = info.DocStoreOffset; + if (docStoreOffset != - 1) + { + Msg(" docStoreOffset=" + docStoreOffset); + segInfoStat.docStoreOffset = docStoreOffset; + Msg(" docStoreSegment=" + info.DocStoreSegment); + segInfoStat.docStoreSegment = info.DocStoreSegment; + Msg(" docStoreIsCompoundFile=" + info.DocStoreIsCompoundFile); + segInfoStat.docStoreCompoundFile = info.DocStoreIsCompoundFile; + } + System.String delFileName = info.GetDelFileName(); + if (delFileName == null) + { + Msg(" no deletions"); + segInfoStat.hasDeletions = false; + } + else + { + Msg(" has deletions [delFileName=" + delFileName + "]"); + segInfoStat.hasDeletions = true; + segInfoStat.deletionsFileName = delFileName; + } + if (infoStream != null) + infoStream.Write(" test: open reader........."); + reader = SegmentReader.Get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR); + + segInfoStat.openReaderPassed = true; + + int numDocs = reader.NumDocs(); + toLoseDocCount = numDocs; + if (reader.HasDeletions) + { + if (reader.deletedDocs.Count() != info.GetDelCount()) + { + throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); + } + if (reader.deletedDocs.Count() > reader.MaxDoc) + { + throw new System.SystemException("too many deleted docs: MaxDoc=" + reader.MaxDoc + " vs deletedDocs.count()=" + reader.deletedDocs.Count()); + } + if (info.docCount - numDocs != info.GetDelCount()) + { + throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); + } + segInfoStat.numDeleted = info.docCount - numDocs; + Msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); + } + else + { + if (info.GetDelCount() != 0) + { + throw new System.SystemException("delete count mismatch: info=" + info.GetDelCount() + " vs reader=" + (info.docCount - numDocs)); + } + Msg("OK"); + } + if (reader.MaxDoc != info.docCount) + throw new System.SystemException("SegmentReader.MaxDoc " + reader.MaxDoc + " != SegmentInfos.docCount " + info.docCount); + + // Test getFieldNames() + if (infoStream != null) + { + infoStream.Write(" test: fields.............."); + } + ICollection fieldNames = reader.GetFieldNames(IndexReader.FieldOption.ALL); + Msg("OK [" + fieldNames.Count + " fields]"); + segInfoStat.numFields = fieldNames.Count; + + // Test Field Norms + segInfoStat.fieldNormStatus = TestFieldNorms(fieldNames, reader); + + // Test the Term Index + segInfoStat.termIndexStatus = TestTermIndex(info, reader); + + // Test Stored Fields + segInfoStat.storedFieldStatus = TestStoredFields(info, reader, nf); + + // Test Term Vectors + segInfoStat.termVectorStatus = TestTermVectors(info, reader, nf); + + // Rethrow the first exception we encountered + // This will cause stats for failed segments to be incremented properly + if (segInfoStat.fieldNormStatus.error != null) + { + throw new SystemException("Field Norm test failed"); + } + else if (segInfoStat.termIndexStatus.error != null) + { + throw new SystemException("Term Index test failed"); + } + else if (segInfoStat.storedFieldStatus.error != null) + { + throw new SystemException("Stored Field test failed"); + } + else if (segInfoStat.termVectorStatus.error != null) + { + throw new System.SystemException("Term Vector test failed"); + } + + Msg(""); + } + catch (System.Exception t) + { + Msg("FAILED"); + const string comment = "fixIndex() would remove reference to this segment"; + Msg(" WARNING: " + comment + "; full exception:"); + if (infoStream != null) + infoStream.WriteLine(t.StackTrace); + Msg(""); + result.totLoseDocCount += toLoseDocCount; + result.numBadSegments++; + continue; + } + finally + { + if (reader != null) + reader.Close(); + } + + // Keeper + result.newSegments.Add((SegmentInfo)info.Clone()); + } + + if (0 == result.numBadSegments) + { + result.clean = true; + Msg("No problems were detected with this index.\n"); + } + else + Msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected"); + + return result; + } + + ///

Test field norms. + private Status.FieldNormStatus TestFieldNorms(IEnumerable fieldNames, SegmentReader reader) + { + var status = new Status.FieldNormStatus(); + + try + { + // Test Field Norms + if (infoStream != null) + { + infoStream.Write(" test: field norms........."); + } + + var b = new byte[reader.MaxDoc]; + foreach(string fieldName in fieldNames) + { + if (reader.HasNorms(fieldName)) + { + reader.Norms(fieldName, b, 0); + ++status.totFields; + } + } + + Msg("OK [" + status.totFields + " fields]"); + } + catch (System.Exception e) + { + Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); + status.error = e; + if (infoStream != null) + { + infoStream.WriteLine(e.StackTrace); + } + } + + return status; + } + + /// Test the term index. + private Status.TermIndexStatus TestTermIndex(SegmentInfo info, SegmentReader reader) + { + var status = new Status.TermIndexStatus(); + + try + { + if (infoStream != null) + { + infoStream.Write(" test: terms, freq, prox..."); + } + + TermEnum termEnum = reader.Terms(); + TermPositions termPositions = reader.TermPositions(); + + // Used only to count up # deleted docs for this term + var myTermDocs = new MySegmentTermDocs(reader); + + int maxDoc = reader.MaxDoc; + + while (termEnum.Next()) + { + status.termCount++; + Term term = termEnum.Term; + int docFreq = termEnum.DocFreq(); + termPositions.Seek(term); + int lastDoc = - 1; + int freq0 = 0; + status.totFreq += docFreq; + while (termPositions.Next()) + { + freq0++; + int doc = termPositions.Doc; + int freq = termPositions.Freq; + if (doc <= lastDoc) + { + throw new System.SystemException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); + } + if (doc >= maxDoc) + { + throw new System.SystemException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); + } + + lastDoc = doc; + if (freq <= 0) + { + throw new System.SystemException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); + } + + int lastPos = - 1; + status.totPos += freq; + for (int j = 0; j < freq; j++) + { + int pos = termPositions.NextPosition(); + if (pos < - 1) + { + throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds"); + } + if (pos < lastPos) + { + throw new System.SystemException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos); + } + lastPos = pos; + } + } + + // Now count how many deleted docs occurred in + // this term: + int delCount; + if (reader.HasDeletions) + { + myTermDocs.Seek(term); + while (myTermDocs.Next()) + { + } + delCount = myTermDocs.delCount; + } + else + { + delCount = 0; + } + + if (freq0 + delCount != docFreq) + { + throw new System.SystemException("term " + term + " docFreq=" + docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount); + } + } + + Msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); + } + catch (System.Exception e) + { + Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); + status.error = e; + if (infoStream != null) + { + infoStream.WriteLine(e.StackTrace); + } + } + + return status; + } + + /// Test stored fields for a segment. + private Status.StoredFieldStatus TestStoredFields(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) + { + var status = new Status.StoredFieldStatus(); + + try + { + if (infoStream != null) + { + infoStream.Write(" test: stored fields......."); + } + + // Scan stored fields for all documents + for (int j = 0; j < info.docCount; ++j) + { + if (!reader.IsDeleted(j)) + { + status.docCount++; + Document doc = reader.Document(j); + status.totFields += doc.GetFields().Count; + } + } + + // Validate docCount + if (status.docCount != reader.NumDocs()) + { + throw new System.SystemException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); + } + + Msg(string.Format(format, "OK [{0:d} total field count; avg {1:f} fields per doc]", new object[] { status.totFields, (((float) status.totFields) / status.docCount) })); + } + catch (System.Exception e) + { + Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); + status.error = e; + if (infoStream != null) + { + infoStream.WriteLine(e.StackTrace); + } + } + + return status; + } + + /// Test term vectors for a segment. + private Status.TermVectorStatus TestTermVectors(SegmentInfo info, SegmentReader reader, System.Globalization.NumberFormatInfo format) + { + var status = new Status.TermVectorStatus(); + + try + { + if (infoStream != null) + { + infoStream.Write(" test: term vectors........"); + } + + for (int j = 0; j < info.docCount; ++j) + { + if (!reader.IsDeleted(j)) + { + status.docCount++; + ITermFreqVector[] tfv = reader.GetTermFreqVectors(j); + if (tfv != null) + { + status.totVectors += tfv.Length; + } + } + } + + Msg(System.String.Format(format, "OK [{0:d} total vector count; avg {1:f} term/freq vector fields per doc]", new object[] { status.totVectors, (((float) status.totVectors) / status.docCount) })); + } + catch (System.Exception e) + { + Msg("ERROR [" + System.Convert.ToString(e.Message) + "]"); + status.error = e; + if (infoStream != null) + { + infoStream.WriteLine(e.StackTrace); + } + } + + return status; + } + + /// Repairs the index using previously returned result + /// from . Note that this does not + /// remove any of the unreferenced files after it's done; + /// you must separately open an , which + /// deletes unreferenced files when it's created. + /// + ///

WARNING: this writes a + /// new segments file into the index, effectively removing + /// all documents in broken segments from the index. + /// BE CAREFUL. + /// + ///

WARNING: Make sure you only call this when the + /// index is not opened by any writer. + ///

+ public virtual void FixIndex(Status result) + { + if (result.partial) + throw new System.ArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); + result.newSegments.Commit(result.dir); + } + + private static bool assertsOn; + + private static bool TestAsserts() + { + assertsOn = true; + return true; + } + + private static bool AssertsOn() + { + System.Diagnostics.Debug.Assert(TestAsserts()); + return assertsOn; + } + + /// Command-line interface to check and fix an index. + ///

+ /// Run it like this: + /// + /// java -ea:Lucene.Net... Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] + /// + /// + /// -fix: actually write a new segments_N file, removing any problematic segments + /// -segment X: only check the specified + /// segment(s). This can be specified multiple times, + /// to check more than one segment, eg -segment _2 + /// -segment _a. You can't use this with the -fix + /// option. + /// + ///

WARNING: -fix should only be used on an emergency basis as it will cause + /// documents (perhaps many) to be permanently removed from the index. Always make + /// a backup copy of your index before running this! Do not run this tool on an index + /// that is actively being written to. You have been warned! + ///

Run without -fix, this tool will open the index, report version information + /// and report any exceptions it hits and what action it would take if -fix were + /// specified. With -fix, this tool will remove any segments that have issues and + /// write a new segments_N file. This means all documents contained in the affected + /// segments will be removed. + ///

+ /// This tool exits with exit code 1 if the index cannot be opened or has any + /// corruption, else 0. + ///

+ [STAThread] + public static void Main(System.String[] args) + { + + bool doFix = false; + var onlySegments = new List(); + System.String indexPath = null; + int i = 0; + while (i < args.Length) + { + if (args[i].Equals("-fix")) + { + doFix = true; + i++; + } + else if (args[i].Equals("-segment")) + { + if (i == args.Length - 1) + { + System.Console.Out.WriteLine("ERROR: missing name for -segment option"); + System.Environment.Exit(1); + } + onlySegments.Add(args[i + 1]); + i += 2; + } + else + { + if (indexPath != null) + { + System.Console.Out.WriteLine("ERROR: unexpected extra argument '" + args[i] + "'"); + System.Environment.Exit(1); + } + indexPath = args[i]; + i++; + } + } + + if (indexPath == null) + { + System.Console.Out.WriteLine("\nERROR: index path not specified"); + System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + "that is actively being written to. You have been warned!\n" + "\n" + "Run without -fix, this tool will open the index, report version information\n" + "and report any exceptions it hits and what action it would take if -fix were\n" + "specified. With -fix, this tool will remove any segments that have issues and\n" + "write a new segments_N file. This means all documents contained in the affected\n" + "segments will be removed.\n" + "\n" + "This tool exits with exit code 1 if the index cannot be opened or has any\n" + "corruption, else 0.\n"); + System.Environment.Exit(1); + } + + if (!AssertsOn()) + System.Console.Out.WriteLine("\nNOTE: testing will be more thorough if you run java with '-ea:Lucene.Net...', so assertions are enabled"); + + if (onlySegments.Count == 0) + onlySegments = null; + else if (doFix) + { + System.Console.Out.WriteLine("ERROR: cannot specify both -fix and -segment"); + System.Environment.Exit(1); + } + + System.Console.Out.WriteLine("\nOpening index @ " + indexPath + "\n"); + Directory dir = null; + try + { + dir = FSDirectory.Open(new System.IO.DirectoryInfo(indexPath)); + } + catch (Exception t) + { + Console.Out.WriteLine("ERROR: could not open directory \"" + indexPath + "\"; exiting"); + Console.Out.WriteLine(t.StackTrace); + Environment.Exit(1); + } + + var checker = new CheckIndex(dir); + var tempWriter = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding) + {AutoFlush = true}; + checker.SetInfoStream(tempWriter); + + Status result = checker.CheckIndex_Renamed_Method(onlySegments); + if (result.missingSegments) + { + System.Environment.Exit(1); + } + + if (!result.clean) + { + if (!doFix) + { + System.Console.Out.WriteLine("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n"); + } + else + { + Console.Out.WriteLine("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); + Console.Out.WriteLine("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); + for (var s = 0; s < 5; s++) + { + System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 1000)); + System.Console.Out.WriteLine(" " + (5 - s) + "..."); + } + Console.Out.WriteLine("Writing..."); + checker.FixIndex(result); + Console.Out.WriteLine("OK"); + Console.Out.WriteLine("Wrote new segments file \"" + result.newSegments.GetCurrentSegmentFileName() + "\""); + } + } + System.Console.Out.WriteLine(""); + + int exitCode; + if (result != null && result.clean == true) + exitCode = 0; + else + exitCode = 1; + System.Environment.Exit(exitCode); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/CompoundFileReader.cs b/external/Lucene.Net.Light/src/core/Index/CompoundFileReader.cs new file mode 100644 index 0000000000..74f4fb4d2e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/CompoundFileReader.cs @@ -0,0 +1,317 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Linq; +using Lucene.Net.Support; +using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using Lock = Lucene.Net.Store.Lock; + +namespace Lucene.Net.Index +{ + + + /// Class for accessing a compound stream. + /// This class implements a directory, but is limited to only read operations. + /// Directory methods that would normally modify data throw an exception. + /// + public class CompoundFileReader : Directory + { + + private readonly int readBufferSize; + + private sealed class FileEntry + { + internal long offset; + internal long length; + } + + private bool isDisposed; + + // Base info + private readonly Directory directory; + private readonly System.String fileName; + + private IndexInput stream; + private HashMap entries = new HashMap(); + + + public CompoundFileReader(Directory dir, System.String name):this(dir, name, BufferedIndexInput.BUFFER_SIZE) + { + } + + public CompoundFileReader(Directory dir, System.String name, int readBufferSize) + { + directory = dir; + fileName = name; + this.readBufferSize = readBufferSize; + + bool success = false; + + try + { + stream = dir.OpenInput(name, readBufferSize); + + // read the directory and init files + int count = stream.ReadVInt(); + FileEntry entry = null; + for (int i = 0; i < count; i++) + { + long offset = stream.ReadLong(); + System.String id = stream.ReadString(); + + if (entry != null) + { + // set length of the previous entry + entry.length = offset - entry.offset; + } + + entry = new FileEntry {offset = offset}; + entries[id] = entry; + } + + // set the length of the final entry + if (entry != null) + { + entry.length = stream.Length() - entry.offset; + } + + success = true; + } + finally + { + if (!success && (stream != null)) + { + try + { + stream.Close(); + } + catch (System.IO.IOException) + { + } + } + } + } + + public virtual Directory Directory + { + get { return directory; } + } + + public virtual string Name + { + get { return fileName; } + } + + protected override void Dispose(bool disposing) + { + lock (this) + { + if (isDisposed) return; + if (disposing) + { + if (entries != null) + { + entries.Clear(); + } + if (stream != null) + { + stream.Close(); + } + } + + entries = null; + stream = null; + isDisposed = true; + } + } + + public override IndexInput OpenInput(System.String id) + { + lock (this) + { + // Default to readBufferSize passed in when we were opened + return OpenInput(id, readBufferSize); + } + } + + public override IndexInput OpenInput(System.String id, int readBufferSize) + { + lock (this) + { + if (stream == null) + throw new System.IO.IOException("Stream closed"); + + FileEntry entry = entries[id]; + if (entry == null) + throw new System.IO.IOException("No sub-file with id " + id + " found"); + + return new CSIndexInput(stream, entry.offset, entry.length, readBufferSize); + } + } + + /// Returns an array of strings, one for each file in the directory. + public override System.String[] ListAll() + { + return entries.Keys.ToArray(); + } + + /// Returns true iff a file with the given name exists. + public override bool FileExists(System.String name) + { + return entries.ContainsKey(name); + } + + /// Returns the time the compound file was last modified. + public override long FileModified(System.String name) + { + return directory.FileModified(fileName); + } + + /// Set the modified time of the compound file to now. + public override void TouchFile(System.String name) + { + directory.TouchFile(fileName); + } + + /// Not implemented + /// UnsupportedOperationException + public override void DeleteFile(System.String name) + { + throw new System.NotSupportedException(); + } + + /// Not implemented + /// UnsupportedOperationException + public void RenameFile(System.String from, System.String to) + { + throw new System.NotSupportedException(); + } + + /// Returns the length of a file in the directory. + /// IOException if the file does not exist + public override long FileLength(System.String name) + { + FileEntry e = entries[name]; + if (e == null) + throw new System.IO.IOException("File " + name + " does not exist"); + return e.length; + } + + /// Not implemented + /// UnsupportedOperationException + public override IndexOutput CreateOutput(System.String name) + { + throw new System.NotSupportedException(); + } + + /// Not implemented + /// UnsupportedOperationException + public override Lock MakeLock(System.String name) + { + throw new System.NotSupportedException(); + } + + /// Implementation of an IndexInput that reads from a portion of the + /// compound file. The visibility is left as "package" *only* because + /// this helps with testing since JUnit test cases in a different class + /// can then access package fields of this class. + /// + public /*internal*/ sealed class CSIndexInput : BufferedIndexInput + { + internal IndexInput base_Renamed; + internal long fileOffset; + internal long length; + + private bool isDisposed; + + internal CSIndexInput(IndexInput @base, long fileOffset, long length):this(@base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE) + { + } + + internal CSIndexInput(IndexInput @base, long fileOffset, long length, int readBufferSize):base(readBufferSize) + { + this.base_Renamed = (IndexInput) @base.Clone(); + this.fileOffset = fileOffset; + this.length = length; + } + + public override System.Object Clone() + { + var clone = (CSIndexInput) base.Clone(); + clone.base_Renamed = (IndexInput) base_Renamed.Clone(); + clone.fileOffset = fileOffset; + clone.length = length; + return clone; + } + + /// Expert: implements buffer refill. Reads bytes from the current + /// position in the input. + /// + /// the array to read bytes into + /// + /// the offset in the array to start storing bytes + /// + /// the number of bytes to read + /// + public override void ReadInternal(byte[] b, int offset, int len) + { + long start = FilePointer; + if (start + len > length) + throw new System.IO.IOException("read past EOF"); + base_Renamed.Seek(fileOffset + start); + base_Renamed.ReadBytes(b, offset, len, false); + } + + /// Expert: implements seek. Sets current position in this file, where + /// the next will occur. + /// + /// + /// + public override void SeekInternal(long pos) + { + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (base_Renamed != null) + { + base_Renamed.Close(); + } + } + + isDisposed = true; + } + + public override long Length() + { + return length; + } + + public IndexInput base_Renamed_ForNUnit + { + get { return base_Renamed; } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/CompoundFileWriter.cs b/external/Lucene.Net.Light/src/core/Index/CompoundFileWriter.cs new file mode 100644 index 0000000000..e2905e121b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/CompoundFileWriter.cs @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; + +namespace Lucene.Net.Index +{ + + + /// Combines multiple files into a single compound file. + /// The file format:
+ /// + /// VInt fileCount + /// {Directory} + /// fileCount entries with the following structure: + /// + /// long dataOffset + /// String fileName + /// + /// {File Data} + /// fileCount entries with the raw data of the corresponding file + /// + /// + /// The fileCount integer indicates how many files are contained in this compound + /// file. The {directory} that follows has that many entries. Each directory entry + /// contains a long pointer to the start of this file's data section, and a String + /// with that file's name. + ///
+ public sealed class CompoundFileWriter : IDisposable + { + + private sealed class FileEntry + { + /// source file + internal System.String file; + + /// temporary holder for the start of directory entry for this file + internal long directoryOffset; + + /// temporary holder for the start of this file's data section + internal long dataOffset; + } + + + private readonly Directory directory; + private readonly String fileName; + private readonly HashSet ids; + private readonly LinkedList entries; + private bool merged = false; + private readonly SegmentMerger.CheckAbort checkAbort; + + /// Create the compound stream in the specified file. The file name is the + /// entire name (no extensions are added). + /// + /// NullPointerException if dir or name is null + public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null) + { + } + + internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort) + { + if (dir == null) + throw new ArgumentNullException("dir"); + if (name == null) + throw new ArgumentNullException("name"); + this.checkAbort = checkAbort; + directory = dir; + fileName = name; + ids = new HashSet(); + entries = new LinkedList(); + } + + /// Returns the directory of the compound file. + public Directory Directory + { + get { return directory; } + } + + /// Returns the name of the compound file. + public string Name + { + get { return fileName; } + } + + /// Add a source stream. file is the string by which the + /// sub-stream will be known in the compound stream. + /// + /// + /// IllegalStateException if this writer is closed + /// NullPointerException if file is null + /// IllegalArgumentException if a file with the same name + /// has been added already + /// + public void AddFile(String file) + { + if (merged) + throw new InvalidOperationException("Can't add extensions after merge has been called"); + + if (file == null) + throw new ArgumentNullException("file"); + + try + { + ids.Add(file); + } + catch (Exception) + { + throw new ArgumentException("File " + file + " already added"); + } + + var entry = new FileEntry {file = file}; + entries.AddLast(entry); + } + + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + /// Merge files with the extensions added up to now. + /// All files with these extensions are combined sequentially into the + /// compound stream. After successful merge, the source files + /// are deleted. + /// + /// IllegalStateException if close() had been called before or + /// if no file has been added to this object + /// + public void Dispose() + { + // Extract into protected method if class ever becomes unsealed + + // TODO: Dispose shouldn't throw exceptions! + if (merged) + throw new SystemException("Merge already performed"); + + if ((entries.Count == 0)) + throw new SystemException("No entries to merge have been defined"); + + merged = true; + + // open the compound stream + IndexOutput os = null; + try + { + os = directory.CreateOutput(fileName); + + // Write the number of entries + os.WriteVInt(entries.Count); + + // Write the directory with all offsets at 0. + // Remember the positions of directory entries so that we can + // adjust the offsets later + long totalSize = 0; + foreach (FileEntry fe in entries) + { + fe.directoryOffset = os.FilePointer; + os.WriteLong(0); // for now + os.WriteString(fe.file); + totalSize += directory.FileLength(fe.file); + } + + // Pre-allocate size of file as optimization -- + // this can potentially help IO performance as + // we write the file and also later during + // searching. It also uncovers a disk-full + // situation earlier and hopefully without + // actually filling disk to 100%: + long finalLength = totalSize + os.FilePointer; + os.SetLength(finalLength); + + // Open the files and copy their data into the stream. + // Remember the locations of each file's data section. + var buffer = new byte[16384]; + foreach (FileEntry fe in entries) + { + fe.dataOffset = os.FilePointer; + CopyFile(fe, os, buffer); + } + + // Write the data offsets into the directory of the compound stream + foreach (FileEntry fe in entries) + { + os.Seek(fe.directoryOffset); + os.WriteLong(fe.dataOffset); + } + + System.Diagnostics.Debug.Assert(finalLength == os.Length); + + // Close the output stream. Set the os to null before trying to + // close so that if an exception occurs during the close, the + // finally clause below will not attempt to close the stream + // the second time. + IndexOutput tmp = os; + os = null; + tmp.Close(); + } + finally + { + if (os != null) + try + { + os.Close(); + } + catch (System.IO.IOException) + { + } + } + } + + + /// Copy the contents of the file with specified extension into the + /// provided output stream. Use the provided buffer for moving data + /// to reduce memory allocation. + /// + private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) + { + IndexInput isRenamed = null; + try + { + long startPtr = os.FilePointer; + + isRenamed = directory.OpenInput(source.file); + long length = isRenamed.Length(); + long remainder = length; + int chunk = buffer.Length; + + while (remainder > 0) + { + var len = (int) Math.Min(chunk, remainder); + isRenamed.ReadBytes(buffer, 0, len, false); + os.WriteBytes(buffer, len); + remainder -= len; + if (checkAbort != null) + // Roughly every 2 MB we will check if + // it's time to abort + checkAbort.Work(80); + } + + // Verify that remainder is 0 + if (remainder != 0) + throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); + + // Verify that the output length diff is equal to original file + long endPtr = os.FilePointer; + long diff = endPtr - startPtr; + if (diff != length) + throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); + } + finally + { + if (isRenamed != null) + isRenamed.Close(); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/ConcurrentMergeScheduler.cs b/external/Lucene.Net.Light/src/core/Index/ConcurrentMergeScheduler.cs new file mode 100644 index 0000000000..8b8a300820 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/ConcurrentMergeScheduler.cs @@ -0,0 +1,504 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; +using Lucene.Net.Support; +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + /// A that runs each merge using a + /// separate thread, up until a maximum number of threads + /// () at which when a merge is + /// needed, the thread(s) that are updating the index will + /// pause until one or more merges completes. This is a + /// simple way to use concurrency in the indexing process + /// without having to create and manage application level + /// threads. + /// + + public class ConcurrentMergeScheduler:MergeScheduler + { + + private int mergeThreadPriority = - 1; + + protected internal IList mergeThreads = new List(); + + // Max number of threads allowed to be merging at once + private int _maxThreadCount = 1; + + protected internal Directory dir; + + private bool closed; + protected internal IndexWriter writer; + protected internal int mergeThreadCount; + + public ConcurrentMergeScheduler() + { + if (allInstances != null) + { + // Only for testing + AddMyself(); + } + } + + /// Gets or sets the max # simultaneous threads that may be + /// running. If a merge is necessary yet we already have + /// this many threads running, the incoming thread (that + /// is calling add/updateDocument) will block until + /// a merge thread has completed. + /// + public virtual int MaxThreadCount + { + set + { + if (value < 1) + throw new System.ArgumentException("count should be at least 1"); + _maxThreadCount = value; + } + get { return _maxThreadCount; } + } + + /// Return the priority that merge threads run at. By + /// default the priority is 1 plus the priority of (ie, + /// slightly higher priority than) the first thread that + /// calls merge. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual int GetMergeThreadPriority() + { + lock (this) + { + InitMergeThreadPriority(); + return mergeThreadPriority; + } + } + + /// Set the priority that merge threads run at. + public virtual void SetMergeThreadPriority(int pri) + { + lock (this) + { + if (pri > (int) System.Threading.ThreadPriority.Highest || pri < (int) System.Threading.ThreadPriority.Lowest) + throw new System.ArgumentException("priority must be in range " + (int) System.Threading.ThreadPriority.Lowest + " .. " + (int) System.Threading.ThreadPriority.Highest + " inclusive"); + mergeThreadPriority = pri; + + int numThreads = MergeThreadCount(); + for (int i = 0; i < numThreads; i++) + { + MergeThread merge = mergeThreads[i]; + merge.SetThreadPriority(pri); + } + } + } + + private bool Verbose() + { + return writer != null && writer.Verbose; + } + + private void Message(System.String message) + { + if (Verbose()) + writer.Message("CMS: " + message); + } + + private void InitMergeThreadPriority() + { + lock (this) + { + if (mergeThreadPriority == - 1) + { + // Default to slightly higher priority than our + // calling thread + mergeThreadPriority = 1 + (System.Int32) ThreadClass.Current().Priority; + if (mergeThreadPriority > (int) System.Threading.ThreadPriority.Highest) + mergeThreadPriority = (int) System.Threading.ThreadPriority.Highest; + } + } + } + + protected override void Dispose(bool disposing) + { + //if (disposing) + //{ + closed = true; + //} + } + + public virtual void Sync() + { + lock (this) + { + while (MergeThreadCount() > 0) + { + if (Verbose()) + Message("now wait for threads; currently " + mergeThreads.Count + " still running"); + int count = mergeThreads.Count; + if (Verbose()) + { + for (int i = 0; i < count; i++) + Message(" " + i + ": " + mergeThreads[i]); + } + + System.Threading.Monitor.Wait(this); + + } + } + } + + private int MergeThreadCount() + { + lock (this) + { + int count = 0; + int numThreads = mergeThreads.Count; + for (int i = 0; i < numThreads; i++) + { + if (mergeThreads[i].IsAlive) + { + count++; + } + } + return count; + } + } + + public override void Merge(IndexWriter writer) + { + // TODO: .NET doesn't support this + // assert !Thread.holdsLock(writer); + + this.writer = writer; + + InitMergeThreadPriority(); + + dir = writer.Directory; + + // First, quickly run through the newly proposed merges + // and add any orthogonal merges (ie a merge not + // involving segments already pending to be merged) to + // the queue. If we are way behind on merging, many of + // these newly proposed merges will likely already be + // registered. + + if (Verbose()) + { + Message("now merge"); + Message(" index: " + writer.SegString()); + } + + // Iterate, pulling from the IndexWriter's queue of + // pending merges, until it's empty: + while (true) + { + // TODO: we could be careful about which merges to do in + // the BG (eg maybe the "biggest" ones) vs FG, which + // merges to do first (the easiest ones?), etc. + + MergePolicy.OneMerge merge = writer.GetNextMerge(); + if (merge == null) + { + if (Verbose()) + Message(" no more merges pending; now return"); + return ; + } + + // We do this w/ the primary thread to keep + // deterministic assignment of segment names + writer.MergeInit(merge); + + bool success = false; + try + { + lock (this) + { + while (MergeThreadCount() >= _maxThreadCount) + { + if (Verbose()) + Message(" too many merge threads running; stalling..."); + + System.Threading.Monitor.Wait(this); + + + } + + if (Verbose()) + Message(" consider merge " + merge.SegString(dir)); + + System.Diagnostics.Debug.Assert(MergeThreadCount() < _maxThreadCount); + + // OK to spawn a new merge thread to handle this + // merge: + MergeThread merger = GetMergeThread(writer, merge); + mergeThreads.Add(merger); + if (Verbose()) + Message(" launch new thread [" + merger.Name + "]"); + + merger.Start(); + success = true; + } + } + finally + { + if (!success) + { + writer.MergeFinish(merge); + } + } + } + } + + /// Does the actual merge, by calling + protected internal virtual void DoMerge(MergePolicy.OneMerge merge) + { + writer.Merge(merge); + } + + /// Create and return a new MergeThread + protected internal virtual MergeThread GetMergeThread(IndexWriter writer, MergePolicy.OneMerge merge) + { + lock (this) + { + var thread = new MergeThread(this, writer, merge); + thread.SetThreadPriority(mergeThreadPriority); + thread.IsBackground = true; + thread.Name = "Lucene Merge Thread #" + mergeThreadCount++; + return thread; + } + } + + public /*protected internal*/ class MergeThread:ThreadClass + { + private void InitBlock(ConcurrentMergeScheduler enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ConcurrentMergeScheduler enclosingInstance; + public ConcurrentMergeScheduler Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + internal IndexWriter writer; + internal MergePolicy.OneMerge startMerge; + internal MergePolicy.OneMerge runningMerge; + + public MergeThread(ConcurrentMergeScheduler enclosingInstance, IndexWriter writer, MergePolicy.OneMerge startMerge) + { + InitBlock(enclosingInstance); + this.writer = writer; + this.startMerge = startMerge; + } + + public virtual void SetRunningMerge(MergePolicy.OneMerge merge) + { + lock (this) + { + runningMerge = merge; + } + } + + public virtual MergePolicy.OneMerge RunningMerge + { + get + { + lock (this) + { + return runningMerge; + } + } + } + + public virtual void SetThreadPriority(int pri) + { + try + { + Priority = (System.Threading.ThreadPriority) pri; + } + catch (System.NullReferenceException) + { + // Strangely, Sun's JDK 1.5 on Linux sometimes + // throws NPE out of here... + } + catch (System.Security.SecurityException) + { + // Ignore this because we will still run fine with + // normal thread priority + } + } + + override public void Run() + { + + // First time through the while loop we do the merge + // that we were started with: + MergePolicy.OneMerge merge = this.startMerge; + + try + { + + if (Enclosing_Instance.Verbose()) + Enclosing_Instance.Message(" merge thread: start"); + + while (true) + { + SetRunningMerge(merge); + Enclosing_Instance.DoMerge(merge); + + // Subsequent times through the loop we do any new + // merge that writer says is necessary: + merge = writer.GetNextMerge(); + if (merge != null) + { + writer.MergeInit(merge); + if (Enclosing_Instance.Verbose()) + Enclosing_Instance.Message(" merge thread: do another merge " + merge.SegString(Enclosing_Instance.dir)); + } + else + break; + } + + if (Enclosing_Instance.Verbose()) + Enclosing_Instance.Message(" merge thread: done"); + } + catch (System.Exception exc) + { + // Ignore the exception if it was due to abort: + if (!(exc is MergePolicy.MergeAbortedException)) + { + if (!Enclosing_Instance.suppressExceptions) + { + // suppressExceptions is normally only set during + // testing. + Lucene.Net.Index.ConcurrentMergeScheduler.anyExceptions = true; + Enclosing_Instance.HandleMergeException(exc); + } + } + } + finally + { + lock (Enclosing_Instance) + { + System.Threading.Monitor.PulseAll(Enclosing_Instance); + Enclosing_Instance.mergeThreads.Remove(this); + bool removed = !Enclosing_Instance.mergeThreads.Contains(this); + System.Diagnostics.Debug.Assert(removed); + } + } + } + + public override System.String ToString() + { + MergePolicy.OneMerge merge = RunningMerge ?? startMerge; + return "merge thread: " + merge.SegString(Enclosing_Instance.dir); + } + } + + /// Called when an exception is hit in a background merge + /// thread + /// + protected internal virtual void HandleMergeException(System.Exception exc) + { + // When an exception is hit during merge, IndexWriter + // removes any partial files and then allows another + // merge to run. If whatever caused the error is not + // transient then the exception will keep happening, + // so, we sleep here to avoid saturating CPU in such + // cases: + System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 250)); + + throw new MergePolicy.MergeException(exc, dir); + } + + internal static bool anyExceptions = false; + + /// Used for testing + public static bool AnyUnhandledExceptions() + { + if (allInstances == null) + { + throw new System.SystemException("setTestMode() was not called; often this is because your test case's setUp method fails to call super.setUp in LuceneTestCase"); + } + lock (allInstances) + { + int count = allInstances.Count; + // Make sure all outstanding threads are done so we see + // any exceptions they may produce: + for (int i = 0; i < count; i++) + allInstances[i].Sync(); + bool v = anyExceptions; + anyExceptions = false; + return v; + } + } + + public static void ClearUnhandledExceptions() + { + lock (allInstances) + { + anyExceptions = false; + } + } + + /// Used for testing + private void AddMyself() + { + lock (allInstances) + { + int size = allInstances.Count; + int upto = 0; + for (int i = 0; i < size; i++) + { + ConcurrentMergeScheduler other = allInstances[i]; + if (!(other.closed && 0 == other.MergeThreadCount())) + // Keep this one for now: it still has threads or + // may spawn new threads + allInstances[upto++] = other; + } + allInstances.RemoveRange(upto, allInstances.Count - upto); + allInstances.Add(this); + } + } + + private bool suppressExceptions; + + /// Used for testing + public /*internal*/ virtual void SetSuppressExceptions() + { + suppressExceptions = true; + } + + /// Used for testing + public /*internal*/ virtual void ClearSuppressExceptions() + { + suppressExceptions = false; + } + + /// Used for testing + private static List allInstances; + public static void SetTestMode() + { + allInstances = new List(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/CorruptIndexException.cs b/external/Lucene.Net.Light/src/core/Index/CorruptIndexException.cs new file mode 100644 index 0000000000..d846cb38ff --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/CorruptIndexException.cs @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// This exception is thrown when Lucene detects + /// an inconsistency in the index. + /// + [Serializable] + public class CorruptIndexException:System.IO.IOException + { + public CorruptIndexException(String message):base(message) + { + } + public CorruptIndexException(String message, Exception exp):base(message, exp) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DefaultSkipListReader.cs b/external/Lucene.Net.Light/src/core/Index/DefaultSkipListReader.cs new file mode 100644 index 0000000000..a1cdddec36 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DefaultSkipListReader.cs @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Lucene.Net.Support; +using IndexInput = Lucene.Net.Store.IndexInput; + +namespace Lucene.Net.Index +{ + + /// Implements the skip list reader for the default posting list format + /// that stores positions and payloads. + /// + /// + class DefaultSkipListReader:MultiLevelSkipListReader + { + private bool currentFieldStoresPayloads; + private readonly long[] freqPointer; + private readonly long[] proxPointer; + private readonly int[] payloadLength; + + private long lastFreqPointer; + private long lastProxPointer; + private int lastPayloadLength; + + + internal DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval):base(skipStream, maxSkipLevels, skipInterval) + { + freqPointer = new long[maxSkipLevels]; + proxPointer = new long[maxSkipLevels]; + payloadLength = new int[maxSkipLevels]; + } + + internal virtual void Init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, bool storesPayloads) + { + base.Init(skipPointer, df); + this.currentFieldStoresPayloads = storesPayloads; + lastFreqPointer = freqBasePointer; + lastProxPointer = proxBasePointer; + + for (int i = 0; i < freqPointer.Length; i++) freqPointer[i] = freqBasePointer; + for (int i = 0; i < proxPointer.Length; i++) proxPointer[i] = proxBasePointer; + for (int i = 0; i < payloadLength.Length; i++) payloadLength[i] = 0; + } + + /// Returns the freq pointer of the doc to which the last call of + /// has skipped. + /// + internal virtual long GetFreqPointer() + { + return lastFreqPointer; + } + + /// Returns the prox pointer of the doc to which the last call of + /// has skipped. + /// + internal virtual long GetProxPointer() + { + return lastProxPointer; + } + + /// Returns the payload length of the payload stored just before + /// the doc to which the last call of + /// has skipped. + /// + internal virtual int GetPayloadLength() + { + return lastPayloadLength; + } + + protected internal override void SeekChild(int level) + { + base.SeekChild(level); + freqPointer[level] = lastFreqPointer; + proxPointer[level] = lastProxPointer; + payloadLength[level] = lastPayloadLength; + } + + protected internal override void SetLastSkipData(int level) + { + base.SetLastSkipData(level); + lastFreqPointer = freqPointer[level]; + lastProxPointer = proxPointer[level]; + lastPayloadLength = payloadLength[level]; + } + + + protected internal override int ReadSkipData(int level, IndexInput skipStream) + { + int delta; + if (currentFieldStoresPayloads) + { + // the current field stores payloads. + // if the doc delta is odd then we have + // to read the current payload length + // because it differs from the length of the + // previous payload + delta = skipStream.ReadVInt(); + if ((delta & 1) != 0) + { + payloadLength[level] = skipStream.ReadVInt(); + } + delta = Number.URShift(delta, 1); + } + else + { + delta = skipStream.ReadVInt(); + } + freqPointer[level] += skipStream.ReadVInt(); + proxPointer[level] += skipStream.ReadVInt(); + + return delta; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DefaultSkipListWriter.cs b/external/Lucene.Net.Light/src/core/Index/DefaultSkipListWriter.cs new file mode 100644 index 0000000000..77412af81d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DefaultSkipListWriter.cs @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexOutput = Lucene.Net.Store.IndexOutput; + +namespace Lucene.Net.Index +{ + + + /// Implements the skip list writer for the default posting list format + /// that stores positions and payloads. + /// + /// + class DefaultSkipListWriter:MultiLevelSkipListWriter + { + private int[] lastSkipDoc; + private int[] lastSkipPayloadLength; + private long[] lastSkipFreqPointer; + private long[] lastSkipProxPointer; + + private IndexOutput freqOutput; + private IndexOutput proxOutput; + + private int curDoc; + private bool curStorePayloads; + private int curPayloadLength; + private long curFreqPointer; + private long curProxPointer; + + internal DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput):base(skipInterval, numberOfSkipLevels, docCount) + { + this.freqOutput = freqOutput; + this.proxOutput = proxOutput; + + lastSkipDoc = new int[numberOfSkipLevels]; + lastSkipPayloadLength = new int[numberOfSkipLevels]; + lastSkipFreqPointer = new long[numberOfSkipLevels]; + lastSkipProxPointer = new long[numberOfSkipLevels]; + } + + internal virtual void SetFreqOutput(IndexOutput freqOutput) + { + this.freqOutput = freqOutput; + } + + internal virtual void SetProxOutput(IndexOutput proxOutput) + { + this.proxOutput = proxOutput; + } + + /// Sets the values for the current skip data. + internal virtual void SetSkipData(int doc, bool storePayloads, int payloadLength) + { + this.curDoc = doc; + this.curStorePayloads = storePayloads; + this.curPayloadLength = payloadLength; + this.curFreqPointer = freqOutput.FilePointer; + if (proxOutput != null) + this.curProxPointer = proxOutput.FilePointer; + } + + protected internal override void ResetSkip() + { + base.ResetSkip(); + for (int i = 0; i < lastSkipDoc.Length; i++) lastSkipDoc[i] = 0; + for (int i = 0; i < lastSkipPayloadLength.Length; i++) lastSkipPayloadLength[i] = -1; // we don't have to write the first length in the skip list + for (int i = 0; i < lastSkipFreqPointer.Length; i++) lastSkipFreqPointer[i] = freqOutput.FilePointer; + if (proxOutput != null) + for (int i = 0; i < lastSkipProxPointer.Length; i++) lastSkipProxPointer[i] = proxOutput.FilePointer; + } + + protected internal override void WriteSkipData(int level, IndexOutput skipBuffer) + { + // To efficiently store payloads in the posting lists we do not store the length of + // every payload. Instead we omit the length for a payload if the previous payload had + // the same length. + // However, in order to support skipping the payload length at every skip point must be known. + // So we use the same length encoding that we use for the posting lists for the skip data as well: + // Case 1: current field does not store payloads + // SkipDatum --> DocSkip, FreqSkip, ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // DocSkip records the document number before every SkipInterval th document in TermFreqs. + // Document numbers are represented as differences from the previous value in the sequence. + // Case 2: current field stores payloads + // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // PayloadLength --> VInt + // In this case DocSkip/2 is the difference between + // the current and the previous value. If DocSkip + // is odd, then a PayloadLength encoded as VInt follows, + // if DocSkip is even, then it is assumed that the + // current payload length equals the length at the previous + // skip point + if (curStorePayloads) + { + int delta = curDoc - lastSkipDoc[level]; + if (curPayloadLength == lastSkipPayloadLength[level]) + { + // the current payload length equals the length at the previous skip point, + // so we don't store the length again + skipBuffer.WriteVInt(delta * 2); + } + else + { + // the payload length is different from the previous one. We shift the DocSkip, + // set the lowest bit and store the current payload length as VInt. + skipBuffer.WriteVInt(delta * 2 + 1); + skipBuffer.WriteVInt(curPayloadLength); + lastSkipPayloadLength[level] = curPayloadLength; + } + } + else + { + // current field does not store payloads + skipBuffer.WriteVInt(curDoc - lastSkipDoc[level]); + } + skipBuffer.WriteVInt((int) (curFreqPointer - lastSkipFreqPointer[level])); + skipBuffer.WriteVInt((int) (curProxPointer - lastSkipProxPointer[level])); + + lastSkipDoc[level] = curDoc; + //System.out.println("write doc at level " + level + ": " + curDoc); + + lastSkipFreqPointer[level] = curFreqPointer; + lastSkipProxPointer[level] = curProxPointer; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DirectoryReader.cs b/external/Lucene.Net.Light/src/core/Index/DirectoryReader.cs new file mode 100644 index 0000000000..574448ddf8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DirectoryReader.cs @@ -0,0 +1,1548 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Support; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using Directory = Lucene.Net.Store.Directory; +using Lock = Lucene.Net.Store.Lock; +using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException; +using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity; + +namespace Lucene.Net.Index +{ + + /// An IndexReader which reads indexes with multiple segments. + public class DirectoryReader:IndexReader + { + /*new*/ private class AnonymousClassFindSegmentsFile:SegmentInfos.FindSegmentsFile + { + private void InitBlock(bool readOnly, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor) + { + this.readOnly = readOnly; + this.deletionPolicy = deletionPolicy; + this.termInfosIndexDivisor = termInfosIndexDivisor; + } + private bool readOnly; + private IndexDeletionPolicy deletionPolicy; + private int termInfosIndexDivisor; + internal AnonymousClassFindSegmentsFile(bool readOnly, Lucene.Net.Index.IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor, Lucene.Net.Store.Directory Param1):base(Param1) + { + InitBlock(readOnly, deletionPolicy, termInfosIndexDivisor); + } + public /*protected internal*/ override System.Object DoBody(System.String segmentFileName) + { + var infos = new SegmentInfos(); + infos.Read(directory, segmentFileName); + if (readOnly) + return new ReadOnlyDirectoryReader(directory, infos, deletionPolicy, termInfosIndexDivisor); + else + return new DirectoryReader(directory, infos, deletionPolicy, false, termInfosIndexDivisor); + } + } + private class AnonymousClassFindSegmentsFile1:SegmentInfos.FindSegmentsFile + { + private void InitBlock(bool openReadOnly, DirectoryReader enclosingInstance) + { + this.openReadOnly = openReadOnly; + this.enclosingInstance = enclosingInstance; + } + private bool openReadOnly; + private DirectoryReader enclosingInstance; + public DirectoryReader Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFindSegmentsFile1(bool openReadOnly, DirectoryReader enclosingInstance, Lucene.Net.Store.Directory Param1):base(Param1) + { + InitBlock(openReadOnly, enclosingInstance); + } + public /*protected internal*/ override System.Object DoBody(System.String segmentFileName) + { + var infos = new SegmentInfos(); + infos.Read(directory, segmentFileName); + return Enclosing_Instance.DoReopen(infos, false, openReadOnly); + } + } + protected internal Directory internalDirectory; + protected internal bool readOnly; + + internal IndexWriter writer; + + private IndexDeletionPolicy deletionPolicy; + private readonly HashSet synced = new HashSet(); + private Lock writeLock; + private readonly SegmentInfos segmentInfos; + private readonly SegmentInfos segmentInfosStart; + private bool stale; + private readonly int termInfosIndexDivisor; + + private bool rollbackHasChanges; + + private SegmentReader[] subReaders; + private int[] starts; // 1st docno for each segment + private System.Collections.Generic.IDictionary normsCache = new HashMap(); + private int maxDoc = 0; + private int numDocs = - 1; + private bool hasDeletions = false; + + // Max version in index as of when we opened; this can be + // > our current segmentInfos version in case we were + // opened on a past IndexCommit: + private long maxIndexVersion; + + internal static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor) + { + return (IndexReader) new AnonymousClassFindSegmentsFile(readOnly, deletionPolicy, termInfosIndexDivisor, directory).Run(commit); + } + + /// Construct reading the named set of readers. + internal DirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) + { + internalDirectory = directory; + this.readOnly = readOnly; + this.segmentInfos = sis; + this.deletionPolicy = deletionPolicy; + this.termInfosIndexDivisor = termInfosIndexDivisor; + + if (!readOnly) + { + // We assume that this segments_N was previously + // properly sync'd: + synced.UnionWith(sis.Files(directory, true)); + } + + // To reduce the chance of hitting FileNotFound + // (and having to retry), we open segments in + // reverse because IndexWriter merges & deletes + // the newest segments first. + + var readers = new SegmentReader[sis.Count]; + for (int i = sis.Count - 1; i >= 0; i--) + { + bool success = false; + try + { + readers[i] = SegmentReader.Get(readOnly, sis.Info(i), termInfosIndexDivisor); + success = true; + } + finally + { + if (!success) + { + // Close all readers we had opened: + for (i++; i < sis.Count; i++) + { + try + { + readers[i].Close(); + } + catch (System.Exception) + { + // keep going - we want to clean up as much as possible + } + } + } + } + } + + Initialize(readers); + } + + // Used by near real-time search + internal DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor) + { + this.internalDirectory = writer.Directory; + this.readOnly = true; + segmentInfos = infos; + segmentInfosStart = (SegmentInfos) infos.Clone(); + this.termInfosIndexDivisor = termInfosIndexDivisor; + if (!readOnly) + { + // We assume that this segments_N was previously + // properly sync'd: + synced.UnionWith(infos.Files(internalDirectory, true)); + } + + // IndexWriter synchronizes externally before calling + // us, which ensures infos will not change; so there's + // no need to process segments in reverse order + int numSegments = infos.Count; + var readers = new SegmentReader[numSegments]; + Directory dir = writer.Directory; + int upto = 0; + + for (int i = 0; i < numSegments; i++) + { + bool success = false; + try + { + SegmentInfo info = infos.Info(i); + if (info.dir == dir) + { + readers[upto++] = writer.readerPool.GetReadOnlyClone(info, true, termInfosIndexDivisor); + } + success = true; + } + finally + { + if (!success) + { + // Close all readers we had opened: + for (upto--; upto >= 0; upto--) + { + try + { + readers[upto].Close(); + } + catch (System.Exception) + { + // keep going - we want to clean up as much as possible + } + } + } + } + } + + this.writer = writer; + + if (upto < readers.Length) + { + // This means some segments were in a foreign Directory + var newReaders = new SegmentReader[upto]; + Array.Copy(readers, 0, newReaders, 0, upto); + readers = newReaders; + } + + Initialize(readers); + } + + /// This constructor is only used for + internal DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, + IEnumerable> oldNormsCache, bool readOnly, bool doClone, int termInfosIndexDivisor) + { + this.internalDirectory = directory; + this.readOnly = readOnly; + this.segmentInfos = infos; + this.termInfosIndexDivisor = termInfosIndexDivisor; + if (!readOnly) + { + // We assume that this segments_N was previously + // properly sync'd: + synced.UnionWith(infos.Files(directory, true)); + } + + // we put the old SegmentReaders in a map, that allows us + // to lookup a reader using its segment name + IDictionary segmentReaders = new HashMap(); + + if (oldReaders != null) + { + // create a Map SegmentName->SegmentReader + for (int i = 0; i < oldReaders.Length; i++) + { + segmentReaders[oldReaders[i].SegmentName] = i; + } + } + + var newReaders = new SegmentReader[infos.Count]; + + // remember which readers are shared between the old and the re-opened + // DirectoryReader - we have to incRef those readers + var readerShared = new bool[infos.Count]; + + for (int i = infos.Count - 1; i >= 0; i--) + { + // find SegmentReader for this segment + if (!segmentReaders.ContainsKey(infos.Info(i).name)) + { + // this is a new segment, no old SegmentReader can be reused + newReaders[i] = null; + } + else + { + // there is an old reader for this segment - we'll try to reopen it + newReaders[i] = oldReaders[segmentReaders[infos.Info(i).name]]; + } + + bool success = false; + try + { + SegmentReader newReader; + if (newReaders[i] == null || infos.Info(i).GetUseCompoundFile() != newReaders[i].SegmentInfo.GetUseCompoundFile()) + { + + // We should never see a totally new segment during cloning + System.Diagnostics.Debug.Assert(!doClone); + + // this is a new reader; in case we hit an exception we can close it safely + newReader = SegmentReader.Get(readOnly, infos.Info(i), termInfosIndexDivisor); + } + else + { + newReader = newReaders[i].ReopenSegment(infos.Info(i), doClone, readOnly); + } + if (newReader == newReaders[i]) + { + // this reader will be shared between the old and the new one, + // so we must incRef it + readerShared[i] = true; + newReader.IncRef(); + } + else + { + readerShared[i] = false; + newReaders[i] = newReader; + } + success = true; + } + finally + { + if (!success) + { + for (i++; i < infos.Count; i++) + { + if (newReaders[i] != null) + { + try + { + if (!readerShared[i]) + { + // this is a new subReader that is not used by the old one, + // we can close it + newReaders[i].Close(); + } + else + { + // this subReader is also used by the old reader, so instead + // closing we must decRef it + newReaders[i].DecRef(); + } + } + catch (System.IO.IOException) + { + // keep going - we want to clean up as much as possible + } + } + } + } + } + } + + // initialize the readers to calculate maxDoc before we try to reuse the old normsCache + Initialize(newReaders); + + // try to copy unchanged norms from the old normsCache to the new one + if (oldNormsCache != null) + { + foreach(var entry in oldNormsCache) + { + String field = entry.Key; + if (!HasNorms(field)) + { + continue; + } + + byte[] oldBytes = entry.Value; + + var bytes = new byte[MaxDoc]; + + for (int i = 0; i < subReaders.Length; i++) + { + int oldReaderIndex = segmentReaders[subReaders[i].SegmentName]; + + // this SegmentReader was not re-opened, we can copy all of its norms + if (segmentReaders.ContainsKey(subReaders[i].SegmentName) && + (oldReaders[oldReaderIndex] == subReaders[i] + || oldReaders[oldReaderIndex].norms[field] == subReaders[i].norms[field])) + { + // we don't have to synchronize here: either this constructor is called from a SegmentReader, + // in which case no old norms cache is present, or it is called from MultiReader.reopen(), + // which is synchronized + Array.Copy(oldBytes, oldStarts[oldReaderIndex], bytes, starts[i], starts[i + 1] - starts[i]); + } + else + { + subReaders[i].Norms(field, bytes, starts[i]); + } + } + + normsCache[field] = bytes; // update cache + } + } + } + + private void Initialize(SegmentReader[] subReaders) + { + this.subReaders = subReaders; + starts = new int[subReaders.Length + 1]; // build starts array + for (int i = 0; i < subReaders.Length; i++) + { + starts[i] = maxDoc; + maxDoc += subReaders[i].MaxDoc; // compute maxDocs + + if (subReaders[i].HasDeletions) + hasDeletions = true; + } + starts[subReaders.Length] = maxDoc; + + if (!readOnly) + { + maxIndexVersion = SegmentInfos.ReadCurrentVersion(internalDirectory); + } + } + + public override Object Clone() + { + lock (this) + { + try + { + return Clone(readOnly); // Preserve current readOnly + } + catch (Exception ex) + { + throw new SystemException(ex.Message, ex); // TODO: why rethrow this way? + } + } + } + + public override IndexReader Clone(bool openReadOnly) + { + lock (this) + { + DirectoryReader newReader = DoReopen((SegmentInfos) segmentInfos.Clone(), true, openReadOnly); + + if (this != newReader) + { + newReader.deletionPolicy = deletionPolicy; + } + newReader.writer = writer; + // If we're cloning a non-readOnly reader, move the + // writeLock (if there is one) to the new reader: + if (!openReadOnly && writeLock != null) + { + // In near real-time search, reader is always readonly + System.Diagnostics.Debug.Assert(writer == null); + newReader.writeLock = writeLock; + newReader.hasChanges = hasChanges; + newReader.hasDeletions = hasDeletions; + writeLock = null; + hasChanges = false; + } + + return newReader; + } + } + + public override IndexReader Reopen() + { + // Preserve current readOnly + return DoReopen(readOnly, null); + } + + public override IndexReader Reopen(bool openReadOnly) + { + return DoReopen(openReadOnly, null); + } + + public override IndexReader Reopen(IndexCommit commit) + { + return DoReopen(true, commit); + } + + private IndexReader DoReopenFromWriter(bool openReadOnly, IndexCommit commit) + { + System.Diagnostics.Debug.Assert(readOnly); + + if (!openReadOnly) + { + throw new System.ArgumentException("a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)"); + } + + if (commit != null) + { + throw new System.ArgumentException("a reader obtained from IndexWriter.getReader() cannot currently accept a commit"); + } + + // TODO: right now we *always* make a new reader; in + // the future we could have write make some effort to + // detect that no changes have occurred + return writer.GetReader(); + } + + internal virtual IndexReader DoReopen(bool openReadOnly, IndexCommit commit) + { + EnsureOpen(); + + System.Diagnostics.Debug.Assert(commit == null || openReadOnly); + + // If we were obtained by writer.getReader(), re-ask the + // writer to get a new reader. + if (writer != null) + { + return DoReopenFromWriter(openReadOnly, commit); + } + else + { + return DoReopenNoWriter(openReadOnly, commit); + } + } + + private IndexReader DoReopenNoWriter(bool openReadOnly, IndexCommit commit) + { + lock (this) + { + if (commit == null) + { + if (hasChanges) + { + // We have changes, which means we are not readOnly: + System.Diagnostics.Debug.Assert(readOnly == false); + // and we hold the write lock: + System.Diagnostics.Debug.Assert(writeLock != null); + // so no other writer holds the write lock, which + // means no changes could have been done to the index: + System.Diagnostics.Debug.Assert(IsCurrent()); + + if (openReadOnly) + { + return Clone(openReadOnly); + } + else + { + return this; + } + } + else if (IsCurrent()) + { + if (openReadOnly != readOnly) + { + // Just fallback to clone + return Clone(openReadOnly); + } + else + { + return this; + } + } + } + else + { + if (internalDirectory != commit.Directory) + throw new System.IO.IOException("the specified commit does not match the specified Directory"); + if (segmentInfos != null && commit.SegmentsFileName.Equals(segmentInfos.GetCurrentSegmentFileName())) + { + if (readOnly != openReadOnly) + { + // Just fallback to clone + return Clone(openReadOnly); + } + else + { + return this; + } + } + } + + return (IndexReader)new AnonymousFindSegmentsFile(internalDirectory, openReadOnly, this).Run(commit); + } + } + + class AnonymousFindSegmentsFile : SegmentInfos.FindSegmentsFile + { + readonly DirectoryReader enclosingInstance; + readonly bool openReadOnly; + readonly Directory dir; + public AnonymousFindSegmentsFile(Directory directory, bool openReadOnly, DirectoryReader dirReader) : base(directory) + { + this.dir = directory; + this.openReadOnly = openReadOnly; + enclosingInstance = dirReader; + } + + public override object DoBody(string segmentFileName) + { + var infos = new SegmentInfos(); + infos.Read(dir, segmentFileName); + return enclosingInstance.DoReopen(infos, false, openReadOnly); + } + } + + private DirectoryReader DoReopen(SegmentInfos infos, bool doClone, bool openReadOnly) + { + lock (this) + { + DirectoryReader reader; + if (openReadOnly) + { + reader = new ReadOnlyDirectoryReader(internalDirectory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor); + } + else + { + reader = new DirectoryReader(internalDirectory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor); + } + return reader; + } + } + + + /// Version number when this IndexReader was opened. + public override long Version + { + get + { + EnsureOpen(); + return segmentInfos.Version; + } + } + + public override ITermFreqVector[] GetTermFreqVectors(int n) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment + } + + public override ITermFreqVector GetTermFreqVector(int n, System.String field) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].GetTermFreqVector(n - starts[i], field); + } + + + public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) + { + EnsureOpen(); + int i = ReaderIndex(docNumber); // find segment num + subReaders[i].GetTermFreqVector(docNumber - starts[i], field, mapper); + } + + public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) + { + EnsureOpen(); + int i = ReaderIndex(docNumber); // find segment num + subReaders[i].GetTermFreqVector(docNumber - starts[i], mapper); + } + + /// Checks is the index is optimized (if it has a single segment and no deletions) + /// &lt;c&gt;true&lt;/c&gt; if the index is optimized; &lt;c&gt;false&lt;/c&gt; otherwise + public override bool IsOptimized() + { + EnsureOpen(); + return segmentInfos.Count == 1 && !HasDeletions; + } + + public override int NumDocs() + { + // Don't call ensureOpen() here (it could affect performance) + // NOTE: multiple threads may wind up init'ing + // numDocs... but that's harmless + if (numDocs == - 1) + { + // check cache + int n = subReaders.Sum(t => t.NumDocs()); // cache miss--recompute + numDocs = n; + } + return numDocs; + } + + public override int MaxDoc + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return maxDoc; + } + } + + // inherit javadoc + public override Document Document(int n, FieldSelector fieldSelector) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].Document(n - starts[i], fieldSelector); // dispatch to segment reader + } + + public override bool IsDeleted(int n) + { + // Don't call ensureOpen() here (it could affect performance) + int i = ReaderIndex(n); // find segment num + return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader + } + + public override bool HasDeletions + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return hasDeletions; + } + } + + protected internal override void DoDelete(int n) + { + numDocs = - 1; // invalidate cache + int i = ReaderIndex(n); // find segment num + subReaders[i].DeleteDocument(n - starts[i]); // dispatch to segment reader + hasDeletions = true; + } + + protected internal override void DoUndeleteAll() + { + foreach (SegmentReader t in subReaders) + t.UndeleteAll(); + + hasDeletions = false; + numDocs = - 1; // invalidate cache + } + + private int ReaderIndex(int n) + { + // find reader for doc n: + return ReaderIndex(n, this.starts, this.subReaders.Length); + } + + internal static int ReaderIndex(int n, int[] starts, int numSubReaders) + { + // find reader for doc n: + int lo = 0; // search starts array + int hi = numSubReaders - 1; // for first element less + + while (hi >= lo) + { + int mid = Number.URShift((lo + hi), 1); + int midValue = starts[mid]; + if (n < midValue) + hi = mid - 1; + else if (n > midValue) + lo = mid + 1; + else + { + // found a match + while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) + { + mid++; // scan to last match + } + return mid; + } + } + return hi; + } + + public override bool HasNorms(System.String field) + { + EnsureOpen(); + return subReaders.Any(t => t.HasNorms(field)); + } + + public override byte[] Norms(System.String field) + { + lock (this) + { + EnsureOpen(); + byte[] bytes = normsCache[field]; + if (bytes != null) + return bytes; // cache hit + if (!HasNorms(field)) + return null; + + bytes = new byte[MaxDoc]; + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].Norms(field, bytes, starts[i]); + normsCache[field] = bytes; // update cache + return bytes; + } + } + + public override void Norms(System.String field, byte[] result, int offset) + { + lock (this) + { + EnsureOpen(); + byte[] bytes = normsCache[field]; + if (bytes == null && !HasNorms(field)) + { + byte val = DefaultSimilarity.EncodeNorm(1.0f); + for (int index = offset; index < result.Length; index++) + result.SetValue(val, index); + } + else if (bytes != null) + { + // cache hit + Array.Copy(bytes, 0, result, offset, MaxDoc); + } + else + { + for (int i = 0; i < subReaders.Length; i++) + { + // read from segments + subReaders[i].Norms(field, result, offset + starts[i]); + } + } + } + } + + protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed) + { + lock (normsCache) + { + normsCache.Remove(field); // clear cache + } + int i = ReaderIndex(n); // find segment num + subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch + } + + public override TermEnum Terms() + { + EnsureOpen(); + return new MultiTermEnum(this, subReaders, starts, null); + } + + public override TermEnum Terms(Term term) + { + EnsureOpen(); + return new MultiTermEnum(this, subReaders, starts, term); + } + + public override int DocFreq(Term t) + { + EnsureOpen(); + int total = 0; // sum freqs in segments + for (int i = 0; i < subReaders.Length; i++) + total += subReaders[i].DocFreq(t); + return total; + } + + public override TermDocs TermDocs() + { + EnsureOpen(); + return new MultiTermDocs(this, subReaders, starts); + } + + public override TermPositions TermPositions() + { + EnsureOpen(); + return new MultiTermPositions(this, subReaders, starts); + } + + /// Tries to acquire the WriteLock on this directory. this method is only valid if this IndexReader is directory + /// owner. + /// + /// + /// StaleReaderException if the index has changed since this reader was opened + /// CorruptIndexException if the index is corrupt + /// Lucene.Net.Store.LockObtainFailedException + /// if another writer has this index open (write.lock could not be + /// obtained) + /// + /// IOException if there is a low-level IO error + protected internal override void AcquireWriteLock() + { + + if (readOnly) + { + // NOTE: we should not reach this code w/ the core + // IndexReader classes; however, an external subclass + // of IndexReader could reach this. + ReadOnlySegmentReader.NoWrite(); + } + + if (segmentInfos != null) + { + EnsureOpen(); + if (stale) + throw new StaleReaderException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations"); + + if (this.writeLock == null) + { + Lock writeLock = internalDirectory.MakeLock(IndexWriter.WRITE_LOCK_NAME); + if (!writeLock.Obtain(IndexWriter.WRITE_LOCK_TIMEOUT)) + // obtain write lock + { + throw new LockObtainFailedException("Index locked for write: " + writeLock); + } + this.writeLock = writeLock; + + // we have to check whether index has changed since this reader was opened. + // if so, this reader is no longer valid for + // deletion + if (SegmentInfos.ReadCurrentVersion(internalDirectory) > maxIndexVersion) + { + stale = true; + this.writeLock.Release(); + this.writeLock = null; + throw new StaleReaderException("IndexReader out of date and no longer valid for delete, undelete, or setNorm operations"); + } + } + } + } + + /// Commit changes resulting from delete, undeleteAll, or setNorm operations + ///

+ /// If an exception is hit, then either no changes or all changes will have been committed to the index (transactional + /// semantics). + /// + ///

+ /// IOException if there is a low-level IO error + protected internal override void DoCommit(IDictionary commitUserData) + { + if (hasChanges) + { + segmentInfos.UserData = commitUserData; + // Default deleter (for backwards compatibility) is + // KeepOnlyLastCommitDeleter: + var deleter = new IndexFileDeleter(internalDirectory, deletionPolicy ?? new KeepOnlyLastCommitDeletionPolicy(), segmentInfos, null, null, synced); + + segmentInfos.UpdateGeneration(deleter.LastSegmentInfos); + + // Checkpoint the state we are about to change, in + // case we have to roll back: + StartCommit(); + + bool success = false; + try + { + foreach (SegmentReader t in subReaders) + t.Commit(); + + // Sync all files we just wrote + foreach(string fileName in segmentInfos.Files(internalDirectory, false)) + { + if(!synced.Contains(fileName)) + { + System.Diagnostics.Debug.Assert(internalDirectory.FileExists(fileName)); + internalDirectory.Sync(fileName); + synced.Add(fileName); + } + } + + segmentInfos.Commit(internalDirectory); + success = true; + } + finally + { + + if (!success) + { + + // Rollback changes that were made to + // SegmentInfos but failed to get [fully] + // committed. This way this reader instance + // remains consistent (matched to what's + // actually in the index): + RollbackCommit(); + + // Recompute deletable files & remove them (so + // partially written .del files, etc, are + // removed): + deleter.Refresh(); + } + } + + // Have the deleter remove any now unreferenced + // files due to this commit: + deleter.Checkpoint(segmentInfos, true); + deleter.Dispose(); + + maxIndexVersion = segmentInfos.Version; + + if (writeLock != null) + { + writeLock.Release(); // release write lock + writeLock = null; + } + } + hasChanges = false; + } + + internal virtual void StartCommit() + { + rollbackHasChanges = hasChanges; + foreach (SegmentReader t in subReaders) + { + t.StartCommit(); + } + } + + internal virtual void RollbackCommit() + { + hasChanges = rollbackHasChanges; + foreach (SegmentReader t in subReaders) + { + t.RollbackCommit(); + } + } + + public override IDictionary CommitUserData + { + get + { + EnsureOpen(); + return segmentInfos.UserData; + } + } + + public override bool IsCurrent() + { + EnsureOpen(); + if (writer == null || writer.IsClosed()) + { + // we loaded SegmentInfos from the directory + return SegmentInfos.ReadCurrentVersion(internalDirectory) == segmentInfos.Version; + } + else + { + return writer.NrtIsCurrent(segmentInfosStart); + } + } + + protected internal override void DoClose() + { + lock (this) + { + System.IO.IOException ioe = null; + normsCache = null; + foreach (SegmentReader t in subReaders) + { + // try to close each reader, even if an exception is thrown + try + { + t.DecRef(); + } + catch (System.IO.IOException e) + { + if (ioe == null) + ioe = e; + } + } + + // NOTE: only needed in case someone had asked for + // FieldCache for top-level reader (which is generally + // not a good idea): + Search.FieldCache_Fields.DEFAULT.Purge(this); + + // throw the first exception + if (ioe != null) + throw ioe; + } + } + + public override ICollection GetFieldNames(IndexReader.FieldOption fieldNames) + { + EnsureOpen(); + return GetFieldNames(fieldNames, this.subReaders); + } + + internal static ICollection GetFieldNames(IndexReader.FieldOption fieldNames, IndexReader[] subReaders) + { + // maintain a unique set of field names + ISet fieldSet = Support.Compatibility.SetFactory.CreateHashSet(); + foreach (IndexReader reader in subReaders) + { + fieldSet.UnionWith(reader.GetFieldNames(fieldNames)); + } + return fieldSet; + } + + public override IndexReader[] GetSequentialSubReaders() + { + return subReaders; + } + + /// Returns the directory this index resides in. + public override Directory Directory() + { + // Don't ensureOpen here -- in certain cases, when a + // cloned/reopened reader needs to commit, it may call + // this method on the closed original reader + return internalDirectory; + } + + public override int TermInfosIndexDivisor + { + get { return termInfosIndexDivisor; } + } + + /// Expert: return the IndexCommit that this reader has opened. + ///

+ ///

WARNING: this API is new and experimental and may suddenly change.

+ ///

+ public override IndexCommit IndexCommit + { + get { return new ReaderCommit(segmentInfos, internalDirectory); } + } + + /// + /// + public static new ICollection ListCommits(Directory dir) + { + String[] files = dir.ListAll(); + + ICollection commits = new List(); + + var latest = new SegmentInfos(); + latest.Read(dir); + long currentGen = latest.Generation; + + commits.Add(new ReaderCommit(latest, dir)); + + foreach (string fileName in files) + { + if (fileName.StartsWith(IndexFileNames.SEGMENTS) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN) && SegmentInfos.GenerationFromSegmentsFileName(fileName) < currentGen) + { + + var sis = new SegmentInfos(); + try + { + // IOException allowed to throw there, in case + // segments_N is corrupt + sis.Read(dir, fileName); + } + catch (System.IO.FileNotFoundException) + { + // LUCENE-948: on NFS (and maybe others), if + // you have writers switching back and forth + // between machines, it's very likely that the + // dir listing will be stale and will claim a + // file segments_X exists when in fact it + // doesn't. So, we catch this and handle it + // as if the file does not exist + sis = null; + } + + if (sis != null) + commits.Add(new ReaderCommit(sis, dir)); + } + } + + return commits; + } + + private sealed class ReaderCommit:IndexCommit + { + private readonly String segmentsFileName; + private readonly ICollection files; + private readonly Directory dir; + private readonly long generation; + private readonly long version; + private readonly bool isOptimized; + private readonly IDictionary userData; + + internal ReaderCommit(SegmentInfos infos, Directory dir) + { + segmentsFileName = infos.GetCurrentSegmentFileName(); + this.dir = dir; + userData = infos.UserData; + files = infos.Files(dir, true); + version = infos.Version; + generation = infos.Generation; + isOptimized = infos.Count == 1 && !infos.Info(0).HasDeletions(); + } + public override string ToString() + { + return "DirectoryReader.ReaderCommit(" + segmentsFileName + ")"; + } + + public override bool IsOptimized + { + get { return isOptimized; } + } + + public override string SegmentsFileName + { + get { return segmentsFileName; } + } + + public override ICollection FileNames + { + get { return files; } + } + + public override Directory Directory + { + get { return dir; } + } + + public override long Version + { + get { return version; } + } + + public override long Generation + { + get { return generation; } + } + + public override bool IsDeleted + { + get { return false; } + } + + public override IDictionary UserData + { + get { return userData; } + } + + public override void Delete() + { + throw new System.NotSupportedException("This IndexCommit does not support deletions"); + } + } + + internal class MultiTermEnum:TermEnum + { + internal IndexReader topReader; // used for matching TermEnum to TermDocs + private readonly SegmentMergeQueue queue; + + private Term term; + private int docFreq; + internal SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments + + public MultiTermEnum(IndexReader topReader, IndexReader[] readers, int[] starts, Term t) + { + this.topReader = topReader; + queue = new SegmentMergeQueue(readers.Length); + matchingSegments = new SegmentMergeInfo[readers.Length + 1]; + for (int i = 0; i < readers.Length; i++) + { + IndexReader reader = readers[i]; + + TermEnum termEnum = t != null ? reader.Terms(t) : reader.Terms(); + + var smi = new SegmentMergeInfo(starts[i], termEnum, reader) {ord = i}; + if (t == null?smi.Next():termEnum.Term != null) + queue.Add(smi); + // initialize queue + else + smi.Dispose(); + } + + if (t != null && queue.Size() > 0) + { + Next(); + } + } + + public override bool Next() + { + foreach (SegmentMergeInfo smi in matchingSegments) + { + if (smi == null) + break; + if (smi.Next()) + queue.Add(smi); + else + smi.Dispose(); // done with segment + } + + int numMatchingSegments = 0; + matchingSegments[0] = null; + + SegmentMergeInfo top = queue.Top(); + + if (top == null) + { + term = null; + return false; + } + + term = top.term; + docFreq = 0; + + while (top != null && term.CompareTo(top.term) == 0) + { + matchingSegments[numMatchingSegments++] = top; + queue.Pop(); + docFreq += top.termEnum.DocFreq(); // increment freq + top = queue.Top(); + } + + matchingSegments[numMatchingSegments] = null; + return true; + } + + public override Term Term + { + get { return term; } + } + + public override int DocFreq() + { + return docFreq; + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + queue.Dispose(); + } + } + } + + internal class MultiTermDocs : TermDocs + { + internal IndexReader topReader; // used for matching TermEnum to TermDocs + protected internal IndexReader[] readers; + protected internal int[] starts; + protected internal Term term; + + protected internal int base_Renamed = 0; + protected internal int pointer = 0; + + private readonly TermDocs[] readerTermDocs; + protected internal TermDocs current; // == readerTermDocs[pointer] + + private MultiTermEnum tenum; // the term enum used for seeking... can be null + internal int matchingSegmentPos; // position into the matching segments from tenum + internal SegmentMergeInfo smi; // current segment mere info... can be null + + public MultiTermDocs(IndexReader topReader, IndexReader[] r, int[] s) + { + this.topReader = topReader; + readers = r; + starts = s; + + readerTermDocs = new TermDocs[r.Length]; + } + + public virtual int Doc + { + get { return base_Renamed + current.Doc; } + } + + public virtual int Freq + { + get { return current.Freq; } + } + + public virtual void Seek(Term term) + { + this.term = term; + this.base_Renamed = 0; + this.pointer = 0; + this.current = null; + this.tenum = null; + this.smi = null; + this.matchingSegmentPos = 0; + } + + public virtual void Seek(TermEnum termEnum) + { + Seek(termEnum.Term); + var multiTermEnum = termEnum as MultiTermEnum; + if (multiTermEnum != null) + { + tenum = multiTermEnum; + if (topReader != tenum.topReader) + tenum = null; + } + } + + public virtual bool Next() + { + for (; ; ) + { + if (current != null && current.Next()) + { + return true; + } + else if (pointer < readers.Length) + { + if (tenum != null) + { + smi = tenum.matchingSegments[matchingSegmentPos++]; + if (smi == null) + { + pointer = readers.Length; + return false; + } + pointer = smi.ord; + } + base_Renamed = starts[pointer]; + current = TermDocs(pointer++); + } + else + { + return false; + } + } + } + + /// Optimized implementation. + public virtual int Read(int[] docs, int[] freqs) + { + while (true) + { + while (current == null) + { + if (pointer < readers.Length) + { + // try next segment + if (tenum != null) + { + smi = tenum.matchingSegments[matchingSegmentPos++]; + if (smi == null) + { + pointer = readers.Length; + return 0; + } + pointer = smi.ord; + } + base_Renamed = starts[pointer]; + current = TermDocs(pointer++); + } + else + { + return 0; + } + } + int end = current.Read(docs, freqs); + if (end == 0) + { + // none left in segment + current = null; + } + else + { + // got some + int b = base_Renamed; // adjust doc numbers + for (int i = 0; i < end; i++) + docs[i] += b; + return end; + } + } + } + + /* A Possible future optimization could skip entire segments */ + public virtual bool SkipTo(int target) + { + for (; ; ) + { + if (current != null && current.SkipTo(target - base_Renamed)) + { + return true; + } + else if (pointer < readers.Length) + { + if (tenum != null) + { + SegmentMergeInfo smi = tenum.matchingSegments[matchingSegmentPos++]; + if (smi == null) + { + pointer = readers.Length; + return false; + } + pointer = smi.ord; + } + base_Renamed = starts[pointer]; + current = TermDocs(pointer++); + } + else + return false; + } + } + + private TermDocs TermDocs(int i) + { + TermDocs result = readerTermDocs[i] ?? (readerTermDocs[i] = TermDocs(readers[i])); + if (smi != null) + { + System.Diagnostics.Debug.Assert((smi.ord == i)); + System.Diagnostics.Debug.Assert((smi.termEnum.Term.Equals(term))); + result.Seek(smi.termEnum); + } + else + { + result.Seek(term); + } + return result; + } + + protected internal virtual TermDocs TermDocs(IndexReader reader) + { + return term == null ? reader.TermDocs(null):reader.TermDocs(); + } + + public virtual void Close() + { + Dispose(); + } + + public virtual void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + foreach (TermDocs t in readerTermDocs) + { + if (t != null) + t.Close(); + } + } + } + } + + internal class MultiTermPositions:MultiTermDocs, TermPositions + { + public MultiTermPositions(IndexReader topReader, IndexReader[] r, int[] s):base(topReader, r, s) + { + } + + protected internal override TermDocs TermDocs(IndexReader reader) + { + return reader.TermPositions(); + } + + public virtual int NextPosition() + { + return ((TermPositions) current).NextPosition(); + } + + public virtual int PayloadLength + { + get { return ((TermPositions) current).PayloadLength; } + } + + public virtual byte[] GetPayload(byte[] data, int offset) + { + return ((TermPositions) current).GetPayload(data, offset); + } + + + // TODO: Remove warning after API has been finalized + + public virtual bool IsPayloadAvailable + { + get { return ((TermPositions) current).IsPayloadAvailable; } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocConsumer.cs b/external/Lucene.Net.Light/src/core/Index/DocConsumer.cs new file mode 100644 index 0000000000..238e38c5fc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocConsumer.cs @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + abstract class DocConsumer + { + public abstract DocConsumerPerThread AddThread(DocumentsWriterThreadState perThread); + public abstract void Flush(System.Collections.Generic.ICollection threads, SegmentWriteState state); + public abstract void CloseDocStore(SegmentWriteState state); + public abstract void Abort(); + public abstract bool FreeRAM(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocConsumerPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocConsumerPerThread.cs new file mode 100644 index 0000000000..7c7ed025d8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocConsumerPerThread.cs @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + abstract class DocConsumerPerThread + { + + /// Process the document. If there is + /// something for this document to be done in docID order, + /// you should encapsulate that as a + /// DocumentsWriter.DocWriter and return it. + /// DocumentsWriter then calls finish() on this object + /// when it's its turn. + /// + public abstract DocumentsWriter.DocWriter ProcessDocument(); + + public abstract void Abort(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumer.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumer.cs new file mode 100644 index 0000000000..7fc59da4e5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumer.cs @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + + abstract class DocFieldConsumer + { + + internal FieldInfos fieldInfos; + + /// Called when DocumentsWriter decides to create a new + /// segment + /// + public abstract void Flush(IDictionary> threadsAndFields, SegmentWriteState state); + + /// Called when DocumentsWriter decides to close the doc + /// stores + /// + public abstract void CloseDocStore(SegmentWriteState state); + + /// Called when an aborting exception is hit + public abstract void Abort(); + + /// Add a new thread + public abstract DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread); + + /// Called when DocumentsWriter is using too much RAM. + /// The consumer should free RAM, if possible, returning + /// true if any RAM was in fact freed. + /// + public abstract bool FreeRAM(); + + internal virtual void SetFieldInfos(FieldInfos fieldInfos) + { + this.fieldInfos = fieldInfos; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerField.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerField.cs new file mode 100644 index 0000000000..27636e2033 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerField.cs @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + + abstract class DocFieldConsumerPerField + { + /// Processes all occurrences of a single field + public abstract void ProcessFields(IFieldable[] fields, int count); + public abstract void Abort(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerThread.cs new file mode 100644 index 0000000000..8f533ac657 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumerPerThread.cs @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + abstract class DocFieldConsumerPerThread + { + public abstract void StartDocument(); + public abstract DocumentsWriter.DocWriter FinishDocument(); + public abstract DocFieldConsumerPerField AddField(FieldInfo fi); + public abstract void Abort(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumers.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumers.cs new file mode 100644 index 0000000000..61b9b1dacb --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumers.cs @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; + +namespace Lucene.Net.Index +{ + + /// This is just a "splitter" class: it lets you wrap two + /// DocFieldConsumer instances as a single consumer. + /// + + sealed class DocFieldConsumers : DocFieldConsumer + { + private void InitBlock() + { + docFreeList = new PerDoc[1]; + } + internal DocFieldConsumer one; + internal DocFieldConsumer two; + + public DocFieldConsumers(DocFieldConsumer one, DocFieldConsumer two) + { + InitBlock(); + this.one = one; + this.two = two; + } + + internal override void SetFieldInfos(FieldInfos fieldInfos) + { + base.SetFieldInfos(fieldInfos); + one.SetFieldInfos(fieldInfos); + two.SetFieldInfos(fieldInfos); + } + + public override void Flush(IDictionary> threadsAndFields, SegmentWriteState state) + { + + var oneThreadsAndFields = new HashMap>(); + var twoThreadsAndFields = new HashMap>(); + + foreach(var entry in threadsAndFields) + { + DocFieldConsumersPerThread perThread = (DocFieldConsumersPerThread) entry.Key; + ICollection fields = entry.Value; + + IEnumerator fieldsIt = fields.GetEnumerator(); + ICollection oneFields = new HashSet(); + ICollection twoFields = new HashSet(); + while (fieldsIt.MoveNext()) + { + DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldsIt.Current; + oneFields.Add(perField.one); + twoFields.Add(perField.two); + } + + oneThreadsAndFields[perThread.one] = oneFields; + twoThreadsAndFields[perThread.two] = twoFields; + } + + + one.Flush(oneThreadsAndFields, state); + two.Flush(twoThreadsAndFields, state); + } + + public override void CloseDocStore(SegmentWriteState state) + { + try + { + one.CloseDocStore(state); + } + finally + { + two.CloseDocStore(state); + } + } + + public override void Abort() + { + try + { + one.Abort(); + } + finally + { + two.Abort(); + } + } + + public override bool FreeRAM() + { + bool any = one.FreeRAM(); + any |= two.FreeRAM(); + return any; + } + + public override DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread) + { + return new DocFieldConsumersPerThread(docFieldProcessorPerThread, this, one.AddThread(docFieldProcessorPerThread), two.AddThread(docFieldProcessorPerThread)); + } + + internal PerDoc[] docFreeList; + internal int freeCount; + internal int allocCount; + + internal PerDoc GetPerDoc() + { + lock (this) + { + if (freeCount == 0) + { + allocCount++; + if (allocCount > docFreeList.Length) + { + // Grow our free list up front to make sure we have + // enough space to recycle all outstanding PerDoc + // instances + System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length); + docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)]; + } + return new PerDoc(this); + } + else + return docFreeList[--freeCount]; + } + } + + internal void FreePerDoc(PerDoc perDoc) + { + lock (this) + { + System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); + docFreeList[freeCount++] = perDoc; + } + } + + internal class PerDoc:DocumentsWriter.DocWriter + { + public PerDoc(DocFieldConsumers enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(DocFieldConsumers enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private DocFieldConsumers enclosingInstance; + public DocFieldConsumers Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + internal DocumentsWriter.DocWriter one; + internal DocumentsWriter.DocWriter two; + + public override long SizeInBytes() + { + return one.SizeInBytes() + two.SizeInBytes(); + } + + public override void Finish() + { + try + { + try + { + one.Finish(); + } + finally + { + two.Finish(); + } + } + finally + { + Enclosing_Instance.FreePerDoc(this); + } + } + + public override void Abort() + { + try + { + try + { + one.Abort(); + } + finally + { + two.Abort(); + } + } + finally + { + Enclosing_Instance.FreePerDoc(this); + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerField.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerField.cs new file mode 100644 index 0000000000..71e96e0eaa --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerField.cs @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + + sealed class DocFieldConsumersPerField:DocFieldConsumerPerField + { + + internal DocFieldConsumerPerField one; + internal DocFieldConsumerPerField two; + internal DocFieldConsumersPerThread perThread; + + public DocFieldConsumersPerField(DocFieldConsumersPerThread perThread, DocFieldConsumerPerField one, DocFieldConsumerPerField two) + { + this.perThread = perThread; + this.one = one; + this.two = two; + } + + public override void ProcessFields(IFieldable[] fields, int count) + { + one.ProcessFields(fields, count); + two.ProcessFields(fields, count); + } + + public override void Abort() + { + try + { + one.Abort(); + } + finally + { + two.Abort(); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerThread.cs new file mode 100644 index 0000000000..7098966e14 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldConsumersPerThread.cs @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + sealed class DocFieldConsumersPerThread:DocFieldConsumerPerThread + { + + internal DocFieldConsumerPerThread one; + internal DocFieldConsumerPerThread two; + internal DocFieldConsumers parent; + internal DocumentsWriter.DocState docState; + + public DocFieldConsumersPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocFieldConsumers parent, DocFieldConsumerPerThread one, DocFieldConsumerPerThread two) + { + this.parent = parent; + this.one = one; + this.two = two; + docState = docFieldProcessorPerThread.docState; + } + + public override void StartDocument() + { + one.StartDocument(); + two.StartDocument(); + } + + public override void Abort() + { + try + { + one.Abort(); + } + finally + { + two.Abort(); + } + } + + public override DocumentsWriter.DocWriter FinishDocument() + { + DocumentsWriter.DocWriter oneDoc = one.FinishDocument(); + DocumentsWriter.DocWriter twoDoc = two.FinishDocument(); + if (oneDoc == null) + return twoDoc; + else if (twoDoc == null) + return oneDoc; + else + { + DocFieldConsumers.PerDoc both = parent.GetPerDoc(); + both.docID = docState.docID; + System.Diagnostics.Debug.Assert(oneDoc.docID == docState.docID); + System.Diagnostics.Debug.Assert(twoDoc.docID == docState.docID); + both.one = oneDoc; + both.two = twoDoc; + return both; + } + } + + public override DocFieldConsumerPerField AddField(FieldInfo fi) + { + return new DocFieldConsumersPerField(this, one.AddField(fi), two.AddField(fi)); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldProcessor.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessor.cs new file mode 100644 index 0000000000..42891185ce --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessor.cs @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// This is a DocConsumer that gathers all fields under the + /// same name, and calls per-field consumers to process field + /// by field. This class doesn't doesn't do any "real" work + /// of its own: it just forwards the fields to a + /// DocFieldConsumer. + /// + + sealed class DocFieldProcessor : DocConsumer + { + + internal DocumentsWriter docWriter; + internal FieldInfos fieldInfos = new FieldInfos(); + internal DocFieldConsumer consumer; + internal StoredFieldsWriter fieldsWriter; + + public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) + { + this.docWriter = docWriter; + this.consumer = consumer; + consumer.SetFieldInfos(fieldInfos); + fieldsWriter = new StoredFieldsWriter(docWriter, fieldInfos); + } + + public override void CloseDocStore(SegmentWriteState state) + { + consumer.CloseDocStore(state); + fieldsWriter.CloseDocStore(state); + } + + public override void Flush(ICollection threads, SegmentWriteState state) + { + var childThreadsAndFields = new HashMap>(); + foreach(DocConsumerPerThread thread in threads) + { + DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread)thread; + childThreadsAndFields[perThread.consumer] = perThread.Fields(); + perThread.TrimFields(state); + } + fieldsWriter.Flush(state); + consumer.Flush(childThreadsAndFields, state); + + // Important to save after asking consumer to flush so + // consumer can alter the FieldInfo* if necessary. EG, + // FreqProxTermsWriter does this with + // FieldInfo.storePayload. + System.String fileName = state.SegmentFileName(IndexFileNames.FIELD_INFOS_EXTENSION); + fieldInfos.Write(state.directory, fileName); + state.flushedFiles.Add(fileName); + } + + public override void Abort() + { + fieldsWriter.Abort(); + consumer.Abort(); + } + + public override bool FreeRAM() + { + return consumer.FreeRAM(); + } + + public override DocConsumerPerThread AddThread(DocumentsWriterThreadState threadState) + { + return new DocFieldProcessorPerThread(threadState, this); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerField.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerField.cs new file mode 100644 index 0000000000..1078988705 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerField.cs @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + + /// Holds all per thread, per field state. + + sealed class DocFieldProcessorPerField + { + + internal DocFieldConsumerPerField consumer; + internal FieldInfo fieldInfo; + + internal DocFieldProcessorPerField next; + internal int lastGen = - 1; + + internal int fieldCount; + internal IFieldable[] fields = new IFieldable[1]; + + public DocFieldProcessorPerField(DocFieldProcessorPerThread perThread, FieldInfo fieldInfo) + { + this.consumer = perThread.consumer.AddField(fieldInfo); + this.fieldInfo = fieldInfo; + } + + public void Abort() + { + consumer.Abort(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerThread.cs new file mode 100644 index 0000000000..d108116b58 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocFieldProcessorPerThread.cs @@ -0,0 +1,478 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; +using Lucene.Net.Support; +using Document = Lucene.Net.Documents.Document; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; + +namespace Lucene.Net.Index +{ + + /// Gathers all Fieldables for a document under the same + /// name, updates FieldInfos, and calls per-field consumers + /// to process field by field. + /// + /// Currently, only a single thread visits the fields, + /// sequentially, for processing. + /// + + sealed class DocFieldProcessorPerThread:DocConsumerPerThread + { + private void InitBlock() + { + docFreeList = new PerDoc[1]; + } + + internal float docBoost; + internal int fieldGen; + internal DocFieldProcessor docFieldProcessor; + internal FieldInfos fieldInfos; + internal DocFieldConsumerPerThread consumer; + + // Holds all fields seen in current doc + internal DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; + internal int fieldCount; + + // Hash table for all fields ever seen + internal DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; + internal int hashMask = 1; + internal int totalFieldCount; + + internal StoredFieldsWriterPerThread fieldsWriter; + + internal DocumentsWriter.DocState docState; + + public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) + { + InitBlock(); + this.docState = threadState.docState; + this.docFieldProcessor = docFieldProcessor; + this.fieldInfos = docFieldProcessor.fieldInfos; + this.consumer = docFieldProcessor.consumer.AddThread(this); + fieldsWriter = docFieldProcessor.fieldsWriter.AddThread(docState); + } + + public override void Abort() + { + for (int i = 0; i < fieldHash.Length; i++) + { + DocFieldProcessorPerField field = fieldHash[i]; + while (field != null) + { + DocFieldProcessorPerField next = field.next; + field.Abort(); + field = next; + } + } + fieldsWriter.Abort(); + consumer.Abort(); + } + + public System.Collections.Generic.ICollection Fields() + { + System.Collections.Generic.ICollection fields = + new System.Collections.Generic.HashSet(); + for (int i = 0; i < fieldHash.Length; i++) + { + DocFieldProcessorPerField field = fieldHash[i]; + while (field != null) + { + fields.Add(field.consumer); + field = field.next; + } + } + System.Diagnostics.Debug.Assert(fields.Count == totalFieldCount); + return fields; + } + + /// If there are fields we've seen but did not see again + /// in the last run, then free them up. + /// + + internal void TrimFields(SegmentWriteState state) + { + + for (int i = 0; i < fieldHash.Length; i++) + { + DocFieldProcessorPerField perField = fieldHash[i]; + DocFieldProcessorPerField lastPerField = null; + + while (perField != null) + { + + if (perField.lastGen == - 1) + { + + // This field was not seen since the previous + // flush, so, free up its resources now + + // Unhash + if (lastPerField == null) + fieldHash[i] = perField.next; + else + lastPerField.next = perField.next; + + if (state.docWriter.infoStream != null) + state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name); + + totalFieldCount--; + } + else + { + // Reset + perField.lastGen = - 1; + lastPerField = perField; + } + + perField = perField.next; + } + } + } + + private void Rehash() + { + int newHashSize = (fieldHash.Length * 2); + System.Diagnostics.Debug.Assert(newHashSize > fieldHash.Length); + + DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize]; + + // Rehash + int newHashMask = newHashSize - 1; + for (int j = 0; j < fieldHash.Length; j++) + { + DocFieldProcessorPerField fp0 = fieldHash[j]; + while (fp0 != null) + { + int hashPos2 = fp0.fieldInfo.name.GetHashCode() & newHashMask; + DocFieldProcessorPerField nextFP0 = fp0.next; + fp0.next = newHashArray[hashPos2]; + newHashArray[hashPos2] = fp0; + fp0 = nextFP0; + } + } + + fieldHash = newHashArray; + hashMask = newHashMask; + } + + public override DocumentsWriter.DocWriter ProcessDocument() + { + + consumer.StartDocument(); + fieldsWriter.StartDocument(); + + Document doc = docState.doc; + + System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start")); + + fieldCount = 0; + + int thisFieldGen = fieldGen++; + + System.Collections.Generic.IList docFields = doc.GetFields(); + int numDocFields = docFields.Count; + + // Absorb any new fields first seen in this document. + // Also absorb any changes to fields we had already + // seen before (eg suddenly turning on norms or + // vectors, etc.): + + for (int i = 0; i < numDocFields; i++) + { + IFieldable field = docFields[i]; + System.String fieldName = field.Name; + + // Make sure we have a PerField allocated + int hashPos = fieldName.GetHashCode() & hashMask; + DocFieldProcessorPerField fp = fieldHash[hashPos]; + while (fp != null && !fp.fieldInfo.name.Equals(fieldName)) + fp = fp.next; + + if (fp == null) + { + + // TODO FI: we need to genericize the "flags" that a + // field holds, and, how these flags are merged; it + // needs to be more "pluggable" such that if I want + // to have a new "thing" my Fields can do, I can + // easily add it + FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored, + field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, + field.OmitNorms, false, field.OmitTermFreqAndPositions); + + fp = new DocFieldProcessorPerField(this, fi); + fp.next = fieldHash[hashPos]; + fieldHash[hashPos] = fp; + totalFieldCount++; + + if (totalFieldCount >= fieldHash.Length / 2) + Rehash(); + } + else + { + fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored, + field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, + field.OmitNorms, false, field.OmitTermFreqAndPositions); + } + + if (thisFieldGen != fp.lastGen) + { + + // First time we're seeing this field for this doc + fp.fieldCount = 0; + + if (fieldCount == fields.Length) + { + int newSize = fields.Length * 2; + DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize]; + Array.Copy(fields, 0, newArray, 0, fieldCount); + fields = newArray; + } + + fields[fieldCount++] = fp; + fp.lastGen = thisFieldGen; + } + + if (fp.fieldCount == fp.fields.Length) + { + IFieldable[] newArray = new IFieldable[fp.fields.Length * 2]; + Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount); + fp.fields = newArray; + } + + fp.fields[fp.fieldCount++] = field; + if (field.IsStored) + { + fieldsWriter.AddField(field, fp.fieldInfo); + } + } + + // If we are writing vectors then we must visit + // fields in sorted order so they are written in + // sorted order. TODO: we actually only need to + // sort the subset of fields that have vectors + // enabled; we could save [small amount of] CPU + // here. + QuickSort(fields, 0, fieldCount - 1); + + for (int i = 0; i < fieldCount; i++) + fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount); + + if (docState.maxTermPrefix != null && docState.infoStream != null) + { + docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); + docState.maxTermPrefix = null; + } + + DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument(); + DocumentsWriter.DocWriter two = consumer.FinishDocument(); + if (one == null) + { + return two; + } + else if (two == null) + { + return one; + } + else + { + PerDoc both = GetPerDoc(); + both.docID = docState.docID; + System.Diagnostics.Debug.Assert(one.docID == docState.docID); + System.Diagnostics.Debug.Assert(two.docID == docState.docID); + both.one = one; + both.two = two; + return both; + } + } + + internal void QuickSort(DocFieldProcessorPerField[] array, int lo, int hi) + { + if (lo >= hi) + return ; + else if (hi == 1 + lo) + { + if (String.CompareOrdinal(array[lo].fieldInfo.name, array[hi].fieldInfo.name) > 0) + { + DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[hi]; + array[hi] = tmp; + } + return ; + } + + int mid = Number.URShift((lo + hi), 1); + + if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0) + { + DocFieldProcessorPerField tmp = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp; + } + + if (String.CompareOrdinal(array[mid].fieldInfo.name, array[hi].fieldInfo.name) > 0) + { + DocFieldProcessorPerField tmp = array[mid]; + array[mid] = array[hi]; + array[hi] = tmp; + + if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0) + { + DocFieldProcessorPerField tmp2 = array[lo]; + array[lo] = array[mid]; + array[mid] = tmp2; + } + } + + int left = lo + 1; + int right = hi - 1; + + if (left >= right) + return ; + + DocFieldProcessorPerField partition = array[mid]; + + for (; ; ) + { + while (String.CompareOrdinal(array[right].fieldInfo.name, partition.fieldInfo.name) > 0) + --right; + + while (left < right && String.CompareOrdinal(array[left].fieldInfo.name, partition.fieldInfo.name) <= 0) + ++left; + + if (left < right) + { + DocFieldProcessorPerField tmp = array[left]; + array[left] = array[right]; + array[right] = tmp; + --right; + } + else + { + break; + } + } + + QuickSort(array, lo, left); + QuickSort(array, left + 1, hi); + } + + internal PerDoc[] docFreeList; + internal int freeCount; + internal int allocCount; + + internal PerDoc GetPerDoc() + { + lock (this) + { + if (freeCount == 0) + { + allocCount++; + if (allocCount > docFreeList.Length) + { + // Grow our free list up front to make sure we have + // enough space to recycle all outstanding PerDoc + // instances + System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length); + docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)]; + } + return new PerDoc(this); + } + else + return docFreeList[--freeCount]; + } + } + + internal void FreePerDoc(PerDoc perDoc) + { + lock (this) + { + System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); + docFreeList[freeCount++] = perDoc; + } + } + + internal class PerDoc:DocumentsWriter.DocWriter + { + public PerDoc(DocFieldProcessorPerThread enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(DocFieldProcessorPerThread enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private DocFieldProcessorPerThread enclosingInstance; + public DocFieldProcessorPerThread Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + internal DocumentsWriter.DocWriter one; + internal DocumentsWriter.DocWriter two; + + public override long SizeInBytes() + { + return one.SizeInBytes() + two.SizeInBytes(); + } + + public override void Finish() + { + try + { + try + { + one.Finish(); + } + finally + { + two.Finish(); + } + } + finally + { + Enclosing_Instance.FreePerDoc(this); + } + } + + public override void Abort() + { + try + { + try + { + one.Abort(); + } + finally + { + two.Abort(); + } + } + finally + { + Enclosing_Instance.FreePerDoc(this); + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocInverter.cs b/external/Lucene.Net.Light/src/core/Index/DocInverter.cs new file mode 100644 index 0000000000..4153465405 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocInverter.cs @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// This is a DocFieldConsumer that inverts each field, + /// separately, from a Document, and accepts a + /// InvertedTermsConsumer to process those terms. + /// + + sealed class DocInverter : DocFieldConsumer + { + + internal InvertedDocConsumer consumer; + internal InvertedDocEndConsumer endConsumer; + + public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) + { + this.consumer = consumer; + this.endConsumer = endConsumer; + } + + internal override void SetFieldInfos(FieldInfos fieldInfos) + { + base.SetFieldInfos(fieldInfos); + consumer.SetFieldInfos(fieldInfos); + endConsumer.SetFieldInfos(fieldInfos); + } + + public override void Flush(IDictionary> threadsAndFields, SegmentWriteState state) + { + + var childThreadsAndFields = new HashMap>(); + var endChildThreadsAndFields = new HashMap>(); + + foreach (var entry in threadsAndFields) + { + var perThread = (DocInverterPerThread) entry.Key; + + ICollection childFields = new HashSet(); + ICollection endChildFields = new HashSet(); + foreach(DocFieldConsumerPerField field in entry.Value) + { + var perField = (DocInverterPerField)field; + childFields.Add(perField.consumer); + endChildFields.Add(perField.endConsumer); + } + + childThreadsAndFields[perThread.consumer] = childFields; + endChildThreadsAndFields[perThread.endConsumer] = endChildFields; + } + + consumer.Flush(childThreadsAndFields, state); + endConsumer.Flush(endChildThreadsAndFields, state); + } + + public override void CloseDocStore(SegmentWriteState state) + { + consumer.CloseDocStore(state); + endConsumer.CloseDocStore(state); + } + + public override void Abort() + { + consumer.Abort(); + endConsumer.Abort(); + } + + public override bool FreeRAM() + { + return consumer.FreeRAM(); + } + + public override DocFieldConsumerPerThread AddThread(DocFieldProcessorPerThread docFieldProcessorPerThread) + { + return new DocInverterPerThread(docFieldProcessorPerThread, this); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocInverterPerField.cs b/external/Lucene.Net.Light/src/core/Index/DocInverterPerField.cs new file mode 100644 index 0000000000..8cd7c0aea6 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocInverterPerField.cs @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Documents; +using TokenStream = Lucene.Net.Analysis.TokenStream; + +namespace Lucene.Net.Index +{ + + /// Holds state for inverting all occurrences of a single + /// field in the document. This class doesn't do anything + /// itself; instead, it forwards the tokens produced by + /// analysis to its own consumer + /// (InvertedDocConsumerPerField). It also interacts with an + /// endConsumer (InvertedDocEndConsumerPerField). + /// + + sealed class DocInverterPerField:DocFieldConsumerPerField + { + + private DocInverterPerThread perThread; + private FieldInfo fieldInfo; + internal InvertedDocConsumerPerField consumer; + internal InvertedDocEndConsumerPerField endConsumer; + internal DocumentsWriter.DocState docState; + internal FieldInvertState fieldState; + + public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) + { + this.perThread = perThread; + this.fieldInfo = fieldInfo; + docState = perThread.docState; + fieldState = perThread.fieldState; + this.consumer = perThread.consumer.AddField(this, fieldInfo); + this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo); + } + + public override void Abort() + { + consumer.Abort(); + endConsumer.Abort(); + } + + public override void ProcessFields(IFieldable[] fields, int count) + { + + fieldState.Reset(docState.doc.Boost); + + int maxFieldLength = docState.maxFieldLength; + + bool doInvert = consumer.Start(fields, count); + + for (int i = 0; i < count; i++) + { + + IFieldable field = fields[i]; + + // TODO FI: this should be "genericized" to querying + // consumer if it wants to see this particular field + // tokenized. + if (field.IsIndexed && doInvert) + { + + bool anyToken; + + if (fieldState.length > 0) + fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name); + + if (!field.IsTokenized) + { + // un-tokenized field + System.String stringValue = field.StringValue; + int valueLength = stringValue.Length; + perThread.singleToken.Reinit(stringValue, 0, valueLength); + fieldState.attributeSource = perThread.singleToken; + consumer.Start(field); + + bool success = false; + try + { + consumer.Add(); + success = true; + } + finally + { + if (!success) + docState.docWriter.SetAborting(); + } + fieldState.offset += valueLength; + fieldState.length++; + fieldState.position++; + anyToken = valueLength > 0; + } + else + { + // tokenized field + TokenStream stream; + TokenStream streamValue = field.TokenStreamValue; + + if (streamValue != null) + stream = streamValue; + else + { + // the field does not have a TokenStream, + // so we have to obtain one from the analyzer + System.IO.TextReader reader; // find or make Reader + System.IO.TextReader readerValue = field.ReaderValue; + + if (readerValue != null) + reader = readerValue; + else + { + System.String stringValue = field.StringValue; + if (stringValue == null) + throw new System.ArgumentException("field must have either TokenStream, String or Reader value"); + perThread.stringReader.Init(stringValue); + reader = perThread.stringReader; + } + + // Tokenize field and add to postingTable + stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader); + } + + // reset the TokenStream to the first token + stream.Reset(); + + int startLength = fieldState.length; + + try + { + int offsetEnd = fieldState.offset - 1; + + bool hasMoreTokens = stream.IncrementToken(); + + fieldState.attributeSource = stream; + + IOffsetAttribute offsetAttribute = fieldState.attributeSource.AddAttribute(); + IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute(); + + consumer.Start(field); + + for (; ; ) + { + + // If we hit an exception in stream.next below + // (which is fairly common, eg if analyzer + // chokes on a given document), then it's + // non-aborting and (above) this one document + // will be marked as deleted, but still + // consume a docID + + if (!hasMoreTokens) + break; + + int posIncr = posIncrAttribute.PositionIncrement; + fieldState.position += posIncr; + if (fieldState.position > 0) + { + fieldState.position--; + } + + if (posIncr == 0) + fieldState.numOverlap++; + + bool success = false; + try + { + // If we hit an exception in here, we abort + // all buffered documents since the last + // flush, on the likelihood that the + // internal state of the consumer is now + // corrupt and should not be flushed to a + // new segment: + consumer.Add(); + success = true; + } + finally + { + if (!success) + docState.docWriter.SetAborting(); + } + fieldState.position++; + offsetEnd = fieldState.offset + offsetAttribute.EndOffset; + if (++fieldState.length >= maxFieldLength) + { + if (docState.infoStream != null) + docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens"); + break; + } + + hasMoreTokens = stream.IncrementToken(); + } + // trigger streams to perform end-of-stream operations + stream.End(); + + fieldState.offset += offsetAttribute.EndOffset; + anyToken = fieldState.length > startLength; + } + finally + { + stream.Close(); + } + } + + if (anyToken) + fieldState.offset += docState.analyzer.GetOffsetGap(field); + fieldState.boost *= field.Boost; + } + + // LUCENE-2387: don't hang onto the field, so GC can + // reclaim + fields[i] = null; + } + + consumer.Finish(); + endConsumer.Finish(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocInverterPerThread.cs b/external/Lucene.Net.Light/src/core/Index/DocInverterPerThread.cs new file mode 100644 index 0000000000..c38ed35b5f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocInverterPerThread.cs @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Util; +using TokenStream = Lucene.Net.Analysis.TokenStream; + +namespace Lucene.Net.Index +{ + + /// This is a DocFieldConsumer that inverts each field, + /// separately, from a Document, and accepts a + /// InvertedTermsConsumer to process those terms. + /// + + sealed class DocInverterPerThread : DocFieldConsumerPerThread + { + private void InitBlock() + { + singleToken = new SingleTokenAttributeSource(); + } + internal DocInverter docInverter; + internal InvertedDocConsumerPerThread consumer; + internal InvertedDocEndConsumerPerThread endConsumer; + internal SingleTokenAttributeSource singleToken; + + internal class SingleTokenAttributeSource : AttributeSource + { + internal ITermAttribute termAttribute; + internal IOffsetAttribute offsetAttribute; + + internal SingleTokenAttributeSource() + { + termAttribute = AddAttribute(); + offsetAttribute = AddAttribute(); + } + + public void Reinit(System.String stringValue, int startOffset, int endOffset) + { + termAttribute.SetTermBuffer(stringValue); + offsetAttribute.SetOffset(startOffset, endOffset); + } + } + + internal DocumentsWriter.DocState docState; + + internal FieldInvertState fieldState = new FieldInvertState(); + + // Used to read a string value for a field + internal ReusableStringReader stringReader = new ReusableStringReader(); + + public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter) + { + InitBlock(); + this.docInverter = docInverter; + docState = docFieldProcessorPerThread.docState; + consumer = docInverter.consumer.AddThread(this); + endConsumer = docInverter.endConsumer.AddThread(this); + } + + public override void StartDocument() + { + consumer.StartDocument(); + endConsumer.StartDocument(); + } + + public override DocumentsWriter.DocWriter FinishDocument() + { + // TODO: allow endConsumer.finishDocument to also return + // a DocWriter + endConsumer.FinishDocument(); + return consumer.FinishDocument(); + } + + public override void Abort() + { + try + { + consumer.Abort(); + } + finally + { + endConsumer.Abort(); + } + } + + public override DocFieldConsumerPerField AddField(FieldInfo fi) + { + return new DocInverterPerField(this, fi); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocumentsWriter.cs b/external/Lucene.Net.Light/src/core/Index/DocumentsWriter.cs new file mode 100644 index 0000000000..6545d11129 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocumentsWriter.cs @@ -0,0 +1,2075 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using Lucene.Net.Support; +using Analyzer = Lucene.Net.Analysis.Analyzer; +using Document = Lucene.Net.Documents.Document; +using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; +using Directory = Lucene.Net.Store.Directory; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; +using Constants = Lucene.Net.Util.Constants; +using IndexSearcher = Lucene.Net.Search.IndexSearcher; +using Query = Lucene.Net.Search.Query; +using Scorer = Lucene.Net.Search.Scorer; +using Similarity = Lucene.Net.Search.Similarity; +using Weight = Lucene.Net.Search.Weight; + +namespace Lucene.Net.Index +{ + + /// This class accepts multiple added documents and directly + /// writes a single segment file. It does this more + /// efficiently than creating a single segment per document + /// (with DocumentWriter) and doing standard merges on those + /// segments. + /// + /// Each added document is passed to the , + /// which in turn processes the document and interacts with + /// other consumers in the indexing chain. Certain + /// consumers, like and + ///, digest a document and + /// immediately write bytes to the "doc store" files (ie, + /// they do not consume RAM per document, except while they + /// are processing the document). + /// + /// Other consumers, eg and + /// , buffer bytes in RAM and flush only + /// when a new segment is produced. + /// Once we have used our allowed RAM buffer, or the number + /// of added docs is large enough (in the case we are + /// flushing by doc count instead of RAM usage), we create a + /// real segment and flush it to the Directory. + /// + /// Threads: + /// + /// Multiple threads are allowed into addDocument at once. + /// There is an initial synchronized call to getThreadState + /// which allocates a ThreadState for this thread. The same + /// thread will get the same ThreadState over time (thread + /// affinity) so that if there are consistent patterns (for + /// example each thread is indexing a different content + /// source) then we make better use of RAM. Then + /// processDocument is called on that ThreadState without + /// synchronization (most of the "heavy lifting" is in this + /// call). Finally the synchronized "finishDocument" is + /// called to flush changes to the directory. + /// + /// When flush is called by IndexWriter we forcefully idle + /// all threads and flush only once they are all idle. This + /// means you can call flush with a given thread even while + /// other threads are actively adding/deleting documents. + /// + /// + /// Exceptions: + /// + /// Because this class directly updates in-memory posting + /// lists, and flushes stored fields and term vectors + /// directly to files in the directory, there are certain + /// limited times when an exception can corrupt this state. + /// For example, a disk full while flushing stored fields + /// leaves this file in a corrupt state. Or, an OOM + /// exception while appending to the in-memory posting lists + /// can corrupt that posting list. We call such exceptions + /// "aborting exceptions". In these cases we must call + /// abort() to discard all docs added since the last flush. + /// + /// All other exceptions ("non-aborting exceptions") can + /// still partially update the index structures. These + /// updates are consistent, but, they represent only a part + /// of the document seen up until the exception was hit. + /// When this happens, we immediately mark the document as + /// deleted so that the document is always atomically ("all + /// or none") added to the index. + /// + + public sealed class DocumentsWriter : IDisposable + { + internal class AnonymousClassIndexingChain:IndexingChain + { + + internal override DocConsumer GetChain(DocumentsWriter documentsWriter) + { + /* + This is the current indexing chain: + + DocConsumer / DocConsumerPerThread + --> code: DocFieldProcessor / DocFieldProcessorPerThread + --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField + --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField + --> code: DocInverter / DocInverterPerThread / DocInverterPerField + --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField + --> code: TermsHash / TermsHashPerThread / TermsHashPerField + --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField + --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField + --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField + --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField + --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField + --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField + */ + + // Build up indexing chain: + + TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter); + TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); + + InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, new TermsHash(documentsWriter, false, termVectorsWriter, null)); + NormsWriter normsWriter = new NormsWriter(); + DocInverter docInverter = new DocInverter(termsHash, normsWriter); + return new DocFieldProcessor(documentsWriter, docInverter); + } + } + private void InitBlock() + { + maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH; + maxBufferedDeleteTerms = IndexWriter.DEFAULT_MAX_BUFFERED_DELETE_TERMS; + ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024); + waitQueuePauseBytes = (long) (ramBufferSize * 0.1); + waitQueueResumeBytes = (long) (ramBufferSize * 0.05); + freeTrigger = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 1.05); + freeLevel = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024 * 0.95); + maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; + skipDocWriter = new SkipDocWriter(); + byteBlockAllocator = new ByteBlockAllocator(this, DocumentsWriter.BYTE_BLOCK_SIZE); + perDocAllocator = new ByteBlockAllocator(this,DocumentsWriter.PER_DOC_BLOCK_SIZE); + waitQueue = new WaitQueue(this); + } + + internal IndexWriter writer; + internal Directory directory; + + internal System.String segment; // Current segment we are working on + private System.String docStoreSegment; // Current doc-store segment we are writing + private int docStoreOffset; // Current starting doc-store offset of current segment + + private int nextDocID; // Next docID to be added + private int numDocsInRAM; // # docs buffered in RAM + internal int numDocsInStore; // # docs written to doc stores + + // Max # ThreadState instances; if there are more threads + // than this they share ThreadStates + private const int MAX_THREAD_STATE = 5; + private DocumentsWriterThreadState[] threadStates = new DocumentsWriterThreadState[0]; + private HashMap threadBindings = new HashMap(); + + private int pauseThreads; // Non-zero when we need all threads to + // pause (eg to flush) + internal bool flushPending; // True when a thread has decided to flush + internal bool bufferIsFull; // True when it's time to write segment + private bool aborting; // True if an abort is pending + + private DocFieldProcessor docFieldProcessor; + + internal System.IO.StreamWriter infoStream; + internal int maxFieldLength; + internal Similarity similarity; + + internal IList newFiles; + + internal class DocState + { + internal DocumentsWriter docWriter; + internal Analyzer analyzer; + internal int maxFieldLength; + internal System.IO.StreamWriter infoStream; + internal Similarity similarity; + internal int docID; + internal Document doc; + internal System.String maxTermPrefix; + + // Only called by asserts + public bool TestPoint(System.String name) + { + return docWriter.writer.TestPoint(name); + } + + public void Clear() + { + // don't hold onto doc nor analyzer, in case it is + // largish: + doc = null; + analyzer = null; + } + } + + /// Consumer returns this on each doc. This holds any + /// state that must be flushed synchronized "in docID + /// order". We gather these and flush them in order. + /// + internal abstract class DocWriter + { + internal DocWriter next; + internal int docID; + public abstract void Finish(); + public abstract void Abort(); + public abstract long SizeInBytes(); + + internal void SetNext(DocWriter next) + { + this.next = next; + } + } + + /* + * Create and return a new DocWriterBuffer. + */ + internal PerDocBuffer NewPerDocBuffer() + { + return new PerDocBuffer(this); + } + + /* + * RAMFile buffer for DocWriters. + */ + internal class PerDocBuffer : Lucene.Net.Store.RAMFile + { + DocumentsWriter enclosingInstance; + public PerDocBuffer(DocumentsWriter enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + /* + * Allocate bytes used from shared pool. + */ + public override byte[] NewBuffer(int size) + { + System.Diagnostics.Debug.Assert(size == PER_DOC_BLOCK_SIZE); + return enclosingInstance.perDocAllocator.GetByteBlock(false); + } + + /* + * Recycle the bytes used. + */ + internal void Recycle() + { + lock (this) + { + if (buffers.Count > 0) + { + Length = 0; + + // Recycle the blocks + enclosingInstance.perDocAllocator.RecycleByteBlocks(buffers); + buffers.Clear(); + sizeInBytes = 0; + + System.Diagnostics.Debug.Assert(NumBuffers() == 0); + } + } + } + } + + /// The IndexingChain must define the method + /// which returns the DocConsumer that the DocumentsWriter calls to process the + /// documents. + /// + internal abstract class IndexingChain + { + internal abstract DocConsumer GetChain(DocumentsWriter documentsWriter); + } + + internal static readonly IndexingChain DefaultIndexingChain; + + internal DocConsumer consumer; + + // Deletes done after the last flush; these are discarded + // on abort + private BufferedDeletes deletesInRAM = new BufferedDeletes(false); + + // Deletes done before the last flush; these are still + // kept on abort + private BufferedDeletes deletesFlushed = new BufferedDeletes(true); + + // The max number of delete terms that can be buffered before + // they must be flushed to disk. + private int maxBufferedDeleteTerms; + + // How much RAM we can use before flushing. This is 0 if + // we are flushing by doc count instead. + private long ramBufferSize; + private long waitQueuePauseBytes; + private long waitQueueResumeBytes; + + // If we've allocated 5% over our RAM budget, we then + // free down to 95% + private long freeTrigger; + private long freeLevel; + + // Flush @ this number of docs. If ramBufferSize is + // non-zero we will flush by RAM usage instead. + private int maxBufferedDocs; + + private int flushedDocCount; // How many docs already flushed to index + + internal void UpdateFlushedDocCount(int n) + { + lock (this) + { + flushedDocCount += n; + } + } + internal int GetFlushedDocCount() + { + lock (this) + { + return flushedDocCount; + } + } + internal void SetFlushedDocCount(int n) + { + lock (this) + { + flushedDocCount = n; + } + } + + private bool closed; + + internal DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain) + { + InitBlock(); + this.directory = directory; + this.writer = writer; + this.similarity = writer.Similarity; + flushedDocCount = writer.MaxDoc(); + + consumer = indexingChain.GetChain(this); + if (consumer is DocFieldProcessor) + { + docFieldProcessor = (DocFieldProcessor) consumer; + } + } + + /// Returns true if any of the fields in the current + /// buffered docs have omitTermFreqAndPositions==false + /// + internal bool HasProx() + { + return (docFieldProcessor != null)?docFieldProcessor.fieldInfos.HasProx():true; + } + + /// If non-null, various details of indexing are printed + /// here. + /// + internal void SetInfoStream(System.IO.StreamWriter infoStream) + { + lock (this) + { + this.infoStream = infoStream; + for (int i = 0; i < threadStates.Length; i++) + threadStates[i].docState.infoStream = infoStream; + } + } + + internal void SetMaxFieldLength(int maxFieldLength) + { + lock (this) + { + this.maxFieldLength = maxFieldLength; + for (int i = 0; i < threadStates.Length; i++) + threadStates[i].docState.maxFieldLength = maxFieldLength; + } + } + + internal void SetSimilarity(Similarity similarity) + { + lock (this) + { + this.similarity = similarity; + for (int i = 0; i < threadStates.Length; i++) + threadStates[i].docState.similarity = similarity; + } + } + + /// Set how much RAM we can use before flushing. + internal void SetRAMBufferSizeMB(double mb) + { + lock (this) + { + if (mb == IndexWriter.DISABLE_AUTO_FLUSH) + { + ramBufferSize = IndexWriter.DISABLE_AUTO_FLUSH; + waitQueuePauseBytes = 4 * 1024 * 1024; + waitQueueResumeBytes = 2 * 1024 * 1024; + } + else + { + ramBufferSize = (long) (mb * 1024 * 1024); + waitQueuePauseBytes = (long) (ramBufferSize * 0.1); + waitQueueResumeBytes = (long) (ramBufferSize * 0.05); + freeTrigger = (long) (1.05 * ramBufferSize); + freeLevel = (long) (0.95 * ramBufferSize); + } + } + } + + internal double GetRAMBufferSizeMB() + { + lock (this) + { + if (ramBufferSize == IndexWriter.DISABLE_AUTO_FLUSH) + { + return ramBufferSize; + } + else + { + return ramBufferSize / 1024.0 / 1024.0; + } + } + } + + /// Gets or sets max buffered docs, which means we will flush by + /// doc count instead of by RAM usage. + /// + internal int MaxBufferedDocs + { + get { return maxBufferedDocs; } + set { maxBufferedDocs = value; } + } + + /// Get current segment name we are writing. + internal string Segment + { + get { return segment; } + } + + /// Returns how many docs are currently buffered in RAM. + internal int NumDocsInRAM + { + get { return numDocsInRAM; } + } + + /// Returns the current doc store segment we are writing + /// to. + /// + internal string DocStoreSegment + { + get + { + lock (this) + { + return docStoreSegment; + } + } + } + + /// Returns the doc offset into the shared doc store for + /// the current buffered docs. + /// + internal int DocStoreOffset + { + get { return docStoreOffset; } + } + + /// Closes the current open doc stores an returns the doc + /// store segment name. This returns null if there are * + /// no buffered documents. + /// + internal System.String CloseDocStore() + { + lock (this) + { + + System.Diagnostics.Debug.Assert(AllThreadsIdle()); + + if (infoStream != null) + Message("closeDocStore: " + openFiles.Count + " files to flush to segment " + docStoreSegment + " numDocs=" + numDocsInStore); + + bool success = false; + + try + { + InitFlushState(true); + closedFiles.Clear(); + + consumer.CloseDocStore(flushState); + System.Diagnostics.Debug.Assert(0 == openFiles.Count); + + System.String s = docStoreSegment; + docStoreSegment = null; + docStoreOffset = 0; + numDocsInStore = 0; + success = true; + return s; + } + finally + { + if (!success) + { + Abort(); + } + } + } + } + + private ICollection abortedFiles; // List of files that were written before last abort() + + private SegmentWriteState flushState; + + internal ICollection AbortedFiles() + { + return abortedFiles; + } + + internal void Message(System.String message) + { + if (infoStream != null) + writer.Message("DW: " + message); + } + + internal IList openFiles = new List(); + internal IList closedFiles = new List(); + + /* Returns Collection of files in use by this instance, + * including any flushed segments. */ + internal IList OpenFiles() + { + lock (this) + { + // ToArray returns a copy + return openFiles.ToArray(); + } + } + + internal IList ClosedFiles() + { + lock (this) + { + // ToArray returns a copy + return closedFiles.ToArray(); + } + } + + internal void AddOpenFile(System.String name) + { + lock (this) + { + System.Diagnostics.Debug.Assert(!openFiles.Contains(name)); + openFiles.Add(name); + } + } + + internal void RemoveOpenFile(System.String name) + { + lock (this) + { + System.Diagnostics.Debug.Assert(openFiles.Contains(name)); + openFiles.Remove(name); + closedFiles.Add(name); + } + } + + internal void SetAborting() + { + lock (this) + { + aborting = true; + } + } + + /// Called if we hit an exception at a bad time (when + /// updating the index files) and must discard all + /// currently buffered docs. This resets our state, + /// discarding any docs added since last flush. + /// + internal void Abort() + { + lock (this) + { + try + { + if (infoStream != null) + { + Message("docWriter: now abort"); + } + + // Forcefully remove waiting ThreadStates from line + waitQueue.Abort(); + + // Wait for all other threads to finish with + // DocumentsWriter: + PauseAllThreads(); + + try + { + + System.Diagnostics.Debug.Assert(0 == waitQueue.numWaiting); + + waitQueue.waitingBytes = 0; + + try + { + abortedFiles = OpenFiles(); + } + catch (System.Exception) + { + abortedFiles = null; + } + + deletesInRAM.Clear(); + deletesFlushed.Clear(); + openFiles.Clear(); + + for (int i = 0; i < threadStates.Length; i++) + try + { + threadStates[i].consumer.Abort(); + } + catch (System.Exception) + { + } + + try + { + consumer.Abort(); + } + catch (System.Exception) + { + } + + docStoreSegment = null; + numDocsInStore = 0; + docStoreOffset = 0; + + // Reset all postings data + DoAfterFlush(); + } + finally + { + ResumeAllThreads(); + } + } + finally + { + aborting = false; + System.Threading.Monitor.PulseAll(this); + if (infoStream != null) + { + Message("docWriter: done abort; abortedFiles=" + abortedFiles); + } + } + } + } + + /// Reset after a flush + private void DoAfterFlush() + { + // All ThreadStates should be idle when we are called + System.Diagnostics.Debug.Assert(AllThreadsIdle()); + threadBindings.Clear(); + waitQueue.Reset(); + segment = null; + numDocsInRAM = 0; + nextDocID = 0; + bufferIsFull = false; + flushPending = false; + for (int i = 0; i < threadStates.Length; i++) + threadStates[i].DoAfterFlush(); + numBytesUsed = 0; + } + + // Returns true if an abort is in progress + internal bool PauseAllThreads() + { + lock (this) + { + pauseThreads++; + while (!AllThreadsIdle()) + { + System.Threading.Monitor.Wait(this); + } + + return aborting; + } + } + + internal void ResumeAllThreads() + { + lock (this) + { + pauseThreads--; + System.Diagnostics.Debug.Assert(pauseThreads >= 0); + if (0 == pauseThreads) + System.Threading.Monitor.PulseAll(this); + } + } + + private bool AllThreadsIdle() + { + lock (this) + { + for (int i = 0; i < threadStates.Length; i++) + if (!threadStates[i].isIdle) + return false; + return true; + } + } + + internal bool AnyChanges + { + get + { + lock (this) + { + return numDocsInRAM != 0 || deletesInRAM.numTerms != 0 || deletesInRAM.docIDs.Count != 0 || + deletesInRAM.queries.Count != 0; + } + } + } + + private void InitFlushState(bool onlyDocStore) + { + lock (this) + { + InitSegmentName(onlyDocStore); + flushState = new SegmentWriteState(this, directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, writer.TermIndexInterval); + } + } + + /// Flush all pending docs to a new segment + internal int Flush(bool closeDocStore) + { + lock (this) + { + + System.Diagnostics.Debug.Assert(AllThreadsIdle()); + + System.Diagnostics.Debug.Assert(numDocsInRAM > 0); + + System.Diagnostics.Debug.Assert(nextDocID == numDocsInRAM); + System.Diagnostics.Debug.Assert(waitQueue.numWaiting == 0); + System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); + + InitFlushState(false); + + docStoreOffset = numDocsInStore; + + if (infoStream != null) + Message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM); + + bool success = false; + + try + { + + if (closeDocStore) + { + System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName != null); + System.Diagnostics.Debug.Assert(flushState.docStoreSegmentName.Equals(flushState.segmentName)); + CloseDocStore(); + flushState.numDocsInStore = 0; + } + + ICollection threads = new HashSet(); + for (int i = 0; i < threadStates.Length; i++) + threads.Add(threadStates[i].consumer); + consumer.Flush(threads, flushState); + + if (infoStream != null) + { + SegmentInfo si = new SegmentInfo(flushState.segmentName, flushState.numDocs, directory); + long newSegmentSize = si.SizeInBytes(); + System.String message = System.String.Format(nf, " oldRAMSize={0:d} newFlushedSize={1:d} docs/MB={2:f} new/old={3:%}", + new System.Object[] { numBytesUsed, newSegmentSize, (numDocsInRAM / (newSegmentSize / 1024.0 / 1024.0)), (100.0 * newSegmentSize / numBytesUsed) }); + Message(message); + } + + flushedDocCount += flushState.numDocs; + + DoAfterFlush(); + + success = true; + } + finally + { + if (!success) + { + Abort(); + } + } + + System.Diagnostics.Debug.Assert(waitQueue.waitingBytes == 0); + + return flushState.numDocs; + } + } + + internal ICollection GetFlushedFiles() + { + return flushState.flushedFiles; + } + + /// Build compound file for the segment we just flushed + internal void CreateCompoundFile(System.String segment) + { + + CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); + foreach(string flushedFile in flushState.flushedFiles) + { + cfsWriter.AddFile(flushedFile); + } + + // Perform the merge + cfsWriter.Close(); + } + + /// Set flushPending if it is not already set and returns + /// whether it was set. This is used by IndexWriter to + /// trigger a single flush even when multiple threads are + /// trying to do so. + /// + internal bool SetFlushPending() + { + lock (this) + { + if (flushPending) + return false; + else + { + flushPending = true; + return true; + } + } + } + + internal void ClearFlushPending() + { + lock (this) + { + flushPending = false; + } + } + + internal void PushDeletes() + { + lock (this) + { + deletesFlushed.Update(deletesInRAM); + } + } + + public void Dispose() + { + // Move to protected method if class becomes unsealed + lock (this) + { + closed = true; + System.Threading.Monitor.PulseAll(this); + } + } + + internal void InitSegmentName(bool onlyDocStore) + { + lock (this) + { + if (segment == null && (!onlyDocStore || docStoreSegment == null)) + { + segment = writer.NewSegmentName(); + System.Diagnostics.Debug.Assert(numDocsInRAM == 0); + } + if (docStoreSegment == null) + { + docStoreSegment = segment; + System.Diagnostics.Debug.Assert(numDocsInStore == 0); + } + } + } + + /// Returns a free (idle) ThreadState that may be used for + /// indexing this one document. This call also pauses if a + /// flush is pending. If delTerm is non-null then we + /// buffer this deleted term after the thread state has + /// been acquired. + /// + internal DocumentsWriterThreadState GetThreadState(Document doc, Term delTerm) + { + lock (this) + { + + // First, find a thread state. If this thread already + // has affinity to a specific ThreadState, use that one + // again. + DocumentsWriterThreadState state = threadBindings[ThreadClass.Current()]; + if (state == null) + { + + // First time this thread has called us since last + // flush. Find the least loaded thread state: + DocumentsWriterThreadState minThreadState = null; + for (int i = 0; i < threadStates.Length; i++) + { + DocumentsWriterThreadState ts = threadStates[i]; + if (minThreadState == null || ts.numThreads < minThreadState.numThreads) + minThreadState = ts; + } + if (minThreadState != null && (minThreadState.numThreads == 0 || threadStates.Length >= MAX_THREAD_STATE)) + { + state = minThreadState; + state.numThreads++; + } + else + { + // Just create a new "private" thread state + DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1 + threadStates.Length]; + if (threadStates.Length > 0) + Array.Copy(threadStates, 0, newArray, 0, threadStates.Length); + state = newArray[threadStates.Length] = new DocumentsWriterThreadState(this); + threadStates = newArray; + } + threadBindings[ThreadClass.Current()] = state; + } + + // Next, wait until my thread state is idle (in case + // it's shared with other threads) and for threads to + // not be paused nor a flush pending: + WaitReady(state); + + // Allocate segment name if this is the first doc since + // last flush: + InitSegmentName(false); + + state.isIdle = false; + + bool success = false; + try + { + state.docState.docID = nextDocID; + + System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init start")); + + if (delTerm != null) + { + AddDeleteTerm(delTerm, state.docState.docID); + state.doFlushAfter = TimeToFlushDeletes(); + } + + System.Diagnostics.Debug.Assert(writer.TestPoint("DocumentsWriter.ThreadState.init after delTerm")); + + nextDocID++; + numDocsInRAM++; + + // We must at this point commit to flushing to ensure we + // always get N docs when we flush by doc count, even if + // > 1 thread is adding documents: + if (!flushPending && maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs) + { + flushPending = true; + state.doFlushAfter = true; + } + + success = true; + } + finally + { + if (!success) + { + // Forcefully idle this ThreadState: + state.isIdle = true; + System.Threading.Monitor.PulseAll(this); + if (state.doFlushAfter) + { + state.doFlushAfter = false; + flushPending = false; + } + } + } + + return state; + } + } + + /// Returns true if the caller (IndexWriter) should now + /// flush. + /// + internal bool AddDocument(Document doc, Analyzer analyzer) + { + return UpdateDocument(doc, analyzer, null); + } + + internal bool UpdateDocument(Term t, Document doc, Analyzer analyzer) + { + return UpdateDocument(doc, analyzer, t); + } + + internal bool UpdateDocument(Document doc, Analyzer analyzer, Term delTerm) + { + + // This call is synchronized but fast + DocumentsWriterThreadState state = GetThreadState(doc, delTerm); + + DocState docState = state.docState; + docState.doc = doc; + docState.analyzer = analyzer; + + bool doReturnFalse = false; // {{Aroush-2.9}} to handle return from finally clause + + bool success = false; + try + { + // This call is not synchronized and does all the + // work + DocWriter perDoc; + try + { + perDoc = state.consumer.ProcessDocument(); + } + finally + { + docState.Clear(); + } + // This call is synchronized but fast + FinishDocument(state, perDoc); + success = true; + } + finally + { + if (!success) + { + lock (this) + { + + if (aborting) + { + state.isIdle = true; + System.Threading.Monitor.PulseAll(this); + Abort(); + } + else + { + skipDocWriter.docID = docState.docID; + bool success2 = false; + try + { + waitQueue.Add(skipDocWriter); + success2 = true; + } + finally + { + if (!success2) + { + state.isIdle = true; + System.Threading.Monitor.PulseAll(this); + Abort(); + // return false; // {{Aroush-2.9}} this 'return false' is move to outside finally + doReturnFalse = true; + } + } + + if (!doReturnFalse) // {{Aroush-2.9}} added because of the above 'return false' removal + { + state.isIdle = true; + System.Threading.Monitor.PulseAll(this); + + // If this thread state had decided to flush, we + // must clear it so another thread can flush + if (state.doFlushAfter) + { + state.doFlushAfter = false; + flushPending = false; + System.Threading.Monitor.PulseAll(this); + } + + // Immediately mark this document as deleted + // since likely it was partially added. This + // keeps indexing as "all or none" (atomic) when + // adding a document: + AddDeleteDocID(state.docState.docID); + } + } + } + } + } + + if (doReturnFalse) // {{Aroush-2.9}} see comment abouve + { + return false; + } + + return state.doFlushAfter || TimeToFlushDeletes(); + } + + // for testing + internal int GetNumBufferedDeleteTerms() + { + lock (this) + { + return deletesInRAM.numTerms; + } + } + + // for testing + internal IDictionary GetBufferedDeleteTerms() + { + lock (this) + { + return deletesInRAM.terms; + } + } + + /// Called whenever a merge has completed and the merged segments had deletions + internal void RemapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) + { + lock (this) + { + if (docMaps == null) + // The merged segments had no deletes so docIDs did not change and we have nothing to do + return ; + MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount); + deletesInRAM.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); + deletesFlushed.Remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); + flushedDocCount -= mapper.docShift; + } + } + + private void WaitReady(DocumentsWriterThreadState state) + { + lock (this) + { + + while (!closed && ((state != null && !state.isIdle) || pauseThreads != 0 || flushPending || aborting)) + { + System.Threading.Monitor.Wait(this); + } + + if (closed) + throw new AlreadyClosedException("this IndexWriter is closed"); + } + } + + internal bool BufferDeleteTerms(Term[] terms) + { + lock (this) + { + WaitReady(null); + for (int i = 0; i < terms.Length; i++) + AddDeleteTerm(terms[i], numDocsInRAM); + return TimeToFlushDeletes(); + } + } + + internal bool BufferDeleteTerm(Term term) + { + lock (this) + { + WaitReady(null); + AddDeleteTerm(term, numDocsInRAM); + return TimeToFlushDeletes(); + } + } + + internal bool BufferDeleteQueries(Query[] queries) + { + lock (this) + { + WaitReady(null); + for (int i = 0; i < queries.Length; i++) + AddDeleteQuery(queries[i], numDocsInRAM); + return TimeToFlushDeletes(); + } + } + + internal bool BufferDeleteQuery(Query query) + { + lock (this) + { + WaitReady(null); + AddDeleteQuery(query, numDocsInRAM); + return TimeToFlushDeletes(); + } + } + + internal bool DeletesFull() + { + lock (this) + { + return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed + numBytesUsed) >= ramBufferSize) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms)); + } + } + + internal bool DoApplyDeletes() + { + lock (this) + { + // Very similar to deletesFull(), except we don't count + // numBytesAlloc, because we are checking whether + // deletes (alone) are consuming too many resources now + // and thus should be applied. We apply deletes if RAM + // usage is > 1/2 of our allowed RAM buffer, to prevent + // too-frequent flushing of a long tail of tiny segments + // when merges (which always apply deletes) are + // infrequent. + return (ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && (deletesInRAM.bytesUsed + deletesFlushed.bytesUsed) >= ramBufferSize / 2) || (maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH && ((deletesInRAM.Size() + deletesFlushed.Size()) >= maxBufferedDeleteTerms)); + } + } + + private bool TimeToFlushDeletes() + { + lock (this) + { + return (bufferIsFull || DeletesFull()) && SetFlushPending(); + } + } + + internal int MaxBufferedDeleteTerms + { + set { this.maxBufferedDeleteTerms = value; } + get { return maxBufferedDeleteTerms; } + } + + internal bool HasDeletes() + { + lock (this) + { + return deletesFlushed.Any(); + } + } + + internal bool ApplyDeletes(SegmentInfos infos) + { + lock (this) + { + if (!HasDeletes()) + return false; + + if (infoStream != null) + Message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + deletesFlushed.docIDs.Count + " deleted docIDs and " + deletesFlushed.queries.Count + " deleted queries on " + (+ infos.Count) + " segments."); + + int infosEnd = infos.Count; + + int docStart = 0; + bool any = false; + for (int i = 0; i < infosEnd; i++) + { + + // Make sure we never attempt to apply deletes to + // segment in external dir + System.Diagnostics.Debug.Assert(infos.Info(i).dir == directory); + + SegmentReader reader = writer.readerPool.Get(infos.Info(i), false); + try + { + any |= ApplyDeletes(reader, docStart); + docStart += reader.MaxDoc; + } + finally + { + writer.readerPool.Release(reader); + } + } + + deletesFlushed.Clear(); + + return any; + } + } + + // used only by assert + private Term lastDeleteTerm; + + // used only by assert + private bool CheckDeleteTerm(Term term) + { + if (term != null) { + System.Diagnostics.Debug.Assert(lastDeleteTerm == null || term.CompareTo(lastDeleteTerm) > 0, "lastTerm=" + lastDeleteTerm + " vs term=" + term); + } + lastDeleteTerm = term; + return true; + } + + // Apply buffered delete terms, queries and docIDs to the + // provided reader + private bool ApplyDeletes(IndexReader reader, int docIDStart) + { + lock (this) + { + int docEnd = docIDStart + reader.MaxDoc; + bool any = false; + + System.Diagnostics.Debug.Assert(CheckDeleteTerm(null)); + + // Delete by term + TermDocs docs = reader.TermDocs(); + try + { + foreach(KeyValuePair entry in deletesFlushed.terms) + { + Term term = entry.Key; + // LUCENE-2086: we should be iterating a TreeMap, + // here, so terms better be in order: + System.Diagnostics.Debug.Assert(CheckDeleteTerm(term)); + docs.Seek(term); + int limit = entry.Value.GetNum(); + while (docs.Next()) + { + int docID = docs.Doc; + if (docIDStart + docID >= limit) + break; + reader.DeleteDocument(docID); + any = true; + } + } + } + finally + { + docs.Close(); + } + + // Delete by docID + foreach(int docIdInt in deletesFlushed.docIDs) + { + int docID = docIdInt; + if (docID >= docIDStart && docID < docEnd) + { + reader.DeleteDocument(docID - docIDStart); + any = true; + } + } + + // Delete by query + IndexSearcher searcher = new IndexSearcher(reader); + foreach(KeyValuePair entry in deletesFlushed.queries) + { + Query query = (Query) entry.Key; + int limit = (int)entry.Value; + Weight weight = query.Weight(searcher); + Scorer scorer = weight.Scorer(reader, true, false); + if (scorer != null) + { + while (true) + { + int doc = scorer.NextDoc(); + if (((long) docIDStart) + doc >= limit) + break; + reader.DeleteDocument(doc); + any = true; + } + } + } + searcher.Close(); + return any; + } + } + + // Buffer a term in bufferedDeleteTerms, which records the + // current number of documents buffered in ram so that the + // delete term will be applied to those documents as well + // as the disk segments. + private void AddDeleteTerm(Term term, int docCount) + { + lock (this) + { + BufferedDeletes.Num num = deletesInRAM.terms[term]; + int docIDUpto = flushedDocCount + docCount; + if (num == null) + deletesInRAM.terms[term] = new BufferedDeletes.Num(docIDUpto); + else + num.SetNum(docIDUpto); + deletesInRAM.numTerms++; + + deletesInRAM.AddBytesUsed(BYTES_PER_DEL_TERM + term.Text.Length * CHAR_NUM_BYTE); + } + } + + // Buffer a specific docID for deletion. Currently only + // used when we hit a exception when adding a document + private void AddDeleteDocID(int docID) + { + lock (this) + { + deletesInRAM.docIDs.Add(flushedDocCount + docID); + deletesInRAM.AddBytesUsed(BYTES_PER_DEL_DOCID); + } + } + + private void AddDeleteQuery(Query query, int docID) + { + lock (this) + { + deletesInRAM.queries[query] = flushedDocCount + docID; + deletesInRAM.AddBytesUsed(BYTES_PER_DEL_QUERY); + } + } + + internal bool DoBalanceRAM() + { + lock (this) + { + return ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && !bufferIsFull && (numBytesUsed + deletesInRAM.bytesUsed + deletesFlushed.bytesUsed >= ramBufferSize || numBytesAlloc >= freeTrigger); + } + } + + /// Does the synchronized work to finish/flush the + /// inverted document. + /// + private void FinishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter) + { + + if (DoBalanceRAM()) + // Must call this w/o holding synchronized(this) else + // we'll hit deadlock: + BalanceRAM(); + + lock (this) + { + + System.Diagnostics.Debug.Assert(docWriter == null || docWriter.docID == perThread.docState.docID); + + if (aborting) + { + + // We are currently aborting, and another thread is + // waiting for me to become idle. We just forcefully + // idle this threadState; it will be fully reset by + // abort() + if (docWriter != null) + try + { + docWriter.Abort(); + } + catch (System.Exception) + { + } + + perThread.isIdle = true; + System.Threading.Monitor.PulseAll(this); + return ; + } + + bool doPause; + + if (docWriter != null) + doPause = waitQueue.Add(docWriter); + else + { + skipDocWriter.docID = perThread.docState.docID; + doPause = waitQueue.Add(skipDocWriter); + } + + if (doPause) + WaitForWaitQueue(); + + if (bufferIsFull && !flushPending) + { + flushPending = true; + perThread.doFlushAfter = true; + } + + perThread.isIdle = true; + System.Threading.Monitor.PulseAll(this); + } + } + + internal void WaitForWaitQueue() + { + lock (this) + { + do + { + System.Threading.Monitor.Wait(this); + } + while (!waitQueue.DoResume()); + } + } + + internal class SkipDocWriter:DocWriter + { + public override void Finish() + { + } + public override void Abort() + { + } + public override long SizeInBytes() + { + return 0; + } + } + internal SkipDocWriter skipDocWriter; + + internal long GetRAMUsed() + { + return numBytesUsed + deletesInRAM.bytesUsed + deletesFlushed.bytesUsed; + } + + internal long numBytesAlloc; + internal long numBytesUsed; + + internal System.Globalization.NumberFormatInfo nf = System.Globalization.CultureInfo.CurrentCulture.NumberFormat; + + // Coarse estimates used to measure RAM usage of buffered deletes + internal const int OBJECT_HEADER_BYTES = 8; + internal static readonly int POINTER_NUM_BYTE; + internal const int INT_NUM_BYTE = 4; + internal const int CHAR_NUM_BYTE = 2; + + /* Rough logic: HashMap has an array[Entry] w/ varying + load factor (say 2 * POINTER). Entry is object w/ Term + key, BufferedDeletes.Num val, int hash, Entry next + (OBJ_HEADER + 3*POINTER + INT). Term is object w/ + String field and String text (OBJ_HEADER + 2*POINTER). + We don't count Term's field since it's interned. + Term's text is String (OBJ_HEADER + 4*INT + POINTER + + OBJ_HEADER + string.length*CHAR). BufferedDeletes.num is + OBJ_HEADER + INT. */ + + internal static readonly int BYTES_PER_DEL_TERM = 8 * POINTER_NUM_BYTE + 5 * OBJECT_HEADER_BYTES + 6 * INT_NUM_BYTE; + + /* Rough logic: del docIDs are List. Say list + allocates ~2X size (2*POINTER). Integer is OBJ_HEADER + + int */ + internal static readonly int BYTES_PER_DEL_DOCID = 2 * POINTER_NUM_BYTE + OBJECT_HEADER_BYTES + INT_NUM_BYTE; + + /* Rough logic: HashMap has an array[Entry] w/ varying + load factor (say 2 * POINTER). Entry is object w/ + Query key, Integer val, int hash, Entry next + (OBJ_HEADER + 3*POINTER + INT). Query we often + undercount (say 24 bytes). Integer is OBJ_HEADER + INT. */ + internal static readonly int BYTES_PER_DEL_QUERY = 5 * POINTER_NUM_BYTE + 2 * OBJECT_HEADER_BYTES + 2 * INT_NUM_BYTE + 24; + + /* Initial chunks size of the shared byte[] blocks used to + store postings data */ + internal const int BYTE_BLOCK_SHIFT = 15; + internal static readonly int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT; + internal static readonly int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1; + internal static readonly int BYTE_BLOCK_NOT_MASK = ~ BYTE_BLOCK_MASK; + + internal class ByteBlockAllocator : ByteBlockPool.Allocator + { + public ByteBlockAllocator(DocumentsWriter enclosingInstance, int blockSize) + { + this.blockSize = blockSize; + InitBlock(enclosingInstance); + } + private void InitBlock(DocumentsWriter enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private DocumentsWriter enclosingInstance; + public DocumentsWriter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + int blockSize; + internal List freeByteBlocks = new List(); + + /* Allocate another byte[] from the shared pool */ + public /*internal*/ override byte[] GetByteBlock(bool trackAllocations) + { + lock (Enclosing_Instance) + { + int size = freeByteBlocks.Count; + byte[] b; + if (0 == size) + { + // Always record a block allocated, even if + // trackAllocations is false. This is necessary + // because this block will be shared between + // things that don't track allocations (term + // vectors) and things that do (freq/prox + // postings). + Enclosing_Instance.numBytesAlloc += blockSize; + b = new byte[blockSize]; + } + else + { + b = freeByteBlocks[size - 1]; + freeByteBlocks.RemoveAt(size - 1); + } + if (trackAllocations) + Enclosing_Instance.numBytesUsed += blockSize; + System.Diagnostics.Debug.Assert(Enclosing_Instance.numBytesUsed <= Enclosing_Instance.numBytesAlloc); + return b; + } + } + + /* Return byte[]'s to the pool */ + public /*internal*/ override void RecycleByteBlocks(byte[][] blocks, int start, int end) + { + lock (Enclosing_Instance) + { + for (int i = start; i < end; i++) + { + freeByteBlocks.Add(blocks[i]); + blocks[i] = null; + } + } + } + + public /*internal*/ override void RecycleByteBlocks(IList blocks) + { + lock (Enclosing_Instance) + { + int size = blocks.Count; + for(int i=0;i freeIntBlocks = new List(); + + /* Allocate another int[] from the shared pool */ + internal int[] GetIntBlock(bool trackAllocations) + { + lock (this) + { + int size = freeIntBlocks.Count; + int[] b; + if (0 == size) + { + // Always record a block allocated, even if + // trackAllocations is false. This is necessary + // because this block will be shared between + // things that don't track allocations (term + // vectors) and things that do (freq/prox + // postings). + numBytesAlloc += INT_BLOCK_SIZE * INT_NUM_BYTE; + b = new int[INT_BLOCK_SIZE]; + } + else + { + b = freeIntBlocks[size - 1]; + freeIntBlocks.RemoveAt(size - 1); + } + if (trackAllocations) + numBytesUsed += INT_BLOCK_SIZE * INT_NUM_BYTE; + System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc); + return b; + } + } + + internal void BytesAllocated(long numBytes) + { + lock (this) + { + numBytesAlloc += numBytes; + } + } + + internal void BytesUsed(long numBytes) + { + lock (this) + { + numBytesUsed += numBytes; + System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc); + } + } + + /* Return int[]s to the pool */ + internal void RecycleIntBlocks(int[][] blocks, int start, int end) + { + lock (this) + { + for (int i = start; i < end; i++) + { + freeIntBlocks.Add(blocks[i]); + blocks[i] = null; + } + } + } + + internal ByteBlockAllocator byteBlockAllocator; + + internal static int PER_DOC_BLOCK_SIZE = 1024; + + ByteBlockAllocator perDocAllocator; + + /* Initial chunk size of the shared char[] blocks used to + store term text */ + internal const int CHAR_BLOCK_SHIFT = 14; + internal static readonly int CHAR_BLOCK_SIZE = 1 << CHAR_BLOCK_SHIFT; + internal static readonly int CHAR_BLOCK_MASK = CHAR_BLOCK_SIZE - 1; + + internal static readonly int MAX_TERM_LENGTH = CHAR_BLOCK_SIZE - 1; + + private List freeCharBlocks = new List(); + + /* Allocate another char[] from the shared pool */ + internal char[] GetCharBlock() + { + lock (this) + { + int size = freeCharBlocks.Count; + char[] c; + if (0 == size) + { + numBytesAlloc += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; + c = new char[CHAR_BLOCK_SIZE]; + } + else + { + c = freeCharBlocks[size - 1]; + freeCharBlocks.RemoveAt(size - 1); + } + // We always track allocations of char blocks, for now, + // because nothing that skips allocation tracking + // (currently only term vectors) uses its own char + // blocks. + numBytesUsed += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; + System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc); + return c; + } + } + + /* Return char[]s to the pool */ + internal void RecycleCharBlocks(char[][] blocks, int numBlocks) + { + lock (this) + { + for (int i = 0; i < numBlocks; i++) + { + freeCharBlocks.Add(blocks[i]); + blocks[i] = null; + } + } + } + + internal System.String ToMB(long v) + { + return System.String.Format(nf, "{0:f}", new System.Object[] { (v / 1024F / 1024F) }); + } + + + /* We have four pools of RAM: Postings, byte blocks + * (holds freq/prox posting data), char blocks (holds + * characters in the term) and per-doc buffers (stored fields/term vectors). + * Different docs require varying amount of storage from + * these four classes. + * + * For example, docs with many unique single-occurrence + * short terms will use up the Postings RAM and hardly any + * of the other two. Whereas docs with very large terms + * will use alot of char blocks RAM and relatively less of + * the other two. This method just frees allocations from + * the pools once we are over-budget, which balances the + * pools to match the current docs. */ + internal void BalanceRAM() + { + + // We flush when we've used our target usage + long flushTrigger = ramBufferSize; + + long deletesRAMUsed = deletesInRAM.bytesUsed + deletesFlushed.bytesUsed; + + if (numBytesAlloc + deletesRAMUsed > freeTrigger) + { + + if (infoStream != null) + Message( + " RAM: now balance allocations: usedMB=" + ToMB(numBytesUsed) + + " vs trigger=" + ToMB(flushTrigger) + + " allocMB=" + ToMB(numBytesAlloc) + + " deletesMB=" + ToMB(deletesRAMUsed) + + " vs trigger=" + ToMB(freeTrigger) + + " byteBlockFree=" + ToMB(byteBlockAllocator.freeByteBlocks.Count * BYTE_BLOCK_SIZE) + + " perDocFree=" + ToMB(perDocAllocator.freeByteBlocks.Count * PER_DOC_BLOCK_SIZE) + + " charBlockFree=" + ToMB(freeCharBlocks.Count * CHAR_BLOCK_SIZE * CHAR_NUM_BYTE)); + + long startBytesAlloc = numBytesAlloc + deletesRAMUsed; + + int iter = 0; + + // We free equally from each pool in 32 KB + // chunks until we are below our threshold + // (freeLevel) + + bool any = true; + + while (numBytesAlloc + deletesRAMUsed > freeLevel) + { + + lock (this) + { + if (0 == perDocAllocator.freeByteBlocks.Count + && 0 == byteBlockAllocator.freeByteBlocks.Count + && 0 == freeCharBlocks.Count + && 0 == freeIntBlocks.Count + && !any) + { + // Nothing else to free -- must flush now. + bufferIsFull = numBytesUsed + deletesRAMUsed > flushTrigger; + if (infoStream != null) + { + if (bufferIsFull) + Message(" nothing to free; now set bufferIsFull"); + else + Message(" nothing to free"); + } + System.Diagnostics.Debug.Assert(numBytesUsed <= numBytesAlloc); + break; + } + + if ((0 == iter % 5) && byteBlockAllocator.freeByteBlocks.Count > 0) + { + byteBlockAllocator.freeByteBlocks.RemoveAt(byteBlockAllocator.freeByteBlocks.Count - 1); + numBytesAlloc -= BYTE_BLOCK_SIZE; + } + + if ((1 == iter % 5) && freeCharBlocks.Count > 0) + { + freeCharBlocks.RemoveAt(freeCharBlocks.Count - 1); + numBytesAlloc -= CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; + } + + if ((2 == iter % 5) && freeIntBlocks.Count > 0) + { + freeIntBlocks.RemoveAt(freeIntBlocks.Count - 1); + numBytesAlloc -= INT_BLOCK_SIZE * INT_NUM_BYTE; + } + + if ((3 == iter % 5) && perDocAllocator.freeByteBlocks.Count > 0) + { + // Remove upwards of 32 blocks (each block is 1K) + for (int i = 0; i < 32; ++i) + { + perDocAllocator.freeByteBlocks.RemoveAt(perDocAllocator.freeByteBlocks.Count - 1); + numBytesAlloc -= PER_DOC_BLOCK_SIZE; + if (perDocAllocator.freeByteBlocks.Count == 0) + { + break; + } + } + } + } + + if ((4 == iter % 5) && any) + // Ask consumer to free any recycled state + any = consumer.FreeRAM(); + + iter++; + } + + if (infoStream != null) + Message(System.String.Format(nf, " after free: freedMB={0:f} usedMB={1:f} allocMB={2:f}", + new System.Object[] { ((startBytesAlloc - numBytesAlloc) / 1024.0 / 1024.0), (numBytesUsed / 1024.0 / 1024.0), (numBytesAlloc / 1024.0 / 1024.0) })); + } + else + { + // If we have not crossed the 100% mark, but have + // crossed the 95% mark of RAM we are actually + // using, go ahead and flush. This prevents + // over-allocating and then freeing, with every + // flush. + lock (this) + { + + if (numBytesUsed + deletesRAMUsed > flushTrigger) + { + if (infoStream != null) + Message(System.String.Format(nf, " RAM: now flush @ usedMB={0:f} allocMB={1:f} triggerMB={2:f}", + new object[] { (numBytesUsed / 1024.0 / 1024.0), (numBytesAlloc / 1024.0 / 1024.0), (flushTrigger / 1024.0 / 1024.0) })); + + bufferIsFull = true; + } + } + } + } + + internal WaitQueue waitQueue; + + internal class WaitQueue + { + private void InitBlock(DocumentsWriter enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private DocumentsWriter enclosingInstance; + public DocumentsWriter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal DocWriter[] waiting; + internal int nextWriteDocID; + internal int nextWriteLoc; + internal int numWaiting; + internal long waitingBytes; + + public WaitQueue(DocumentsWriter enclosingInstance) + { + InitBlock(enclosingInstance); + waiting = new DocWriter[10]; + } + + internal void Reset() + { + lock (this) + { + // NOTE: nextWriteLoc doesn't need to be reset + System.Diagnostics.Debug.Assert(numWaiting == 0); + System.Diagnostics.Debug.Assert(waitingBytes == 0); + nextWriteDocID = 0; + } + } + + internal bool DoResume() + { + lock (this) + { + return waitingBytes <= Enclosing_Instance.waitQueueResumeBytes; + } + } + + internal bool DoPause() + { + lock (this) + { + return waitingBytes > Enclosing_Instance.waitQueuePauseBytes; + } + } + + internal void Abort() + { + lock (this) + { + int count = 0; + for (int i = 0; i < waiting.Length; i++) + { + DocWriter doc = waiting[i]; + if (doc != null) + { + doc.Abort(); + waiting[i] = null; + count++; + } + } + waitingBytes = 0; + System.Diagnostics.Debug.Assert(count == numWaiting); + numWaiting = 0; + } + } + + private void WriteDocument(DocWriter doc) + { + System.Diagnostics.Debug.Assert(doc == Enclosing_Instance.skipDocWriter || nextWriteDocID == doc.docID); + bool success = false; + try + { + doc.Finish(); + nextWriteDocID++; + Enclosing_Instance.numDocsInStore++; + nextWriteLoc++; + System.Diagnostics.Debug.Assert(nextWriteLoc <= waiting.Length); + if (nextWriteLoc == waiting.Length) + nextWriteLoc = 0; + success = true; + } + finally + { + if (!success) + Enclosing_Instance.SetAborting(); + } + } + + public bool Add(DocWriter doc) + { + lock (this) + { + + System.Diagnostics.Debug.Assert(doc.docID >= nextWriteDocID); + + if (doc.docID == nextWriteDocID) + { + WriteDocument(doc); + while (true) + { + doc = waiting[nextWriteLoc]; + if (doc != null) + { + numWaiting--; + waiting[nextWriteLoc] = null; + waitingBytes -= doc.SizeInBytes(); + WriteDocument(doc); + } + else + break; + } + } + else + { + + // I finished before documents that were added + // before me. This can easily happen when I am a + // small doc and the docs before me were large, or, + // just due to luck in the thread scheduling. Just + // add myself to the queue and when that large doc + // finishes, it will flush me: + int gap = doc.docID - nextWriteDocID; + if (gap >= waiting.Length) + { + // Grow queue + DocWriter[] newArray = new DocWriter[ArrayUtil.GetNextSize(gap)]; + System.Diagnostics.Debug.Assert(nextWriteLoc >= 0); + Array.Copy(waiting, nextWriteLoc, newArray, 0, waiting.Length - nextWriteLoc); + Array.Copy(waiting, 0, newArray, waiting.Length - nextWriteLoc, nextWriteLoc); + nextWriteLoc = 0; + waiting = newArray; + gap = doc.docID - nextWriteDocID; + } + + int loc = nextWriteLoc + gap; + if (loc >= waiting.Length) + loc -= waiting.Length; + + // We should only wrap one time + System.Diagnostics.Debug.Assert(loc < waiting.Length); + + // Nobody should be in my spot! + System.Diagnostics.Debug.Assert(waiting [loc] == null); + waiting[loc] = doc; + numWaiting++; + waitingBytes += doc.SizeInBytes(); + } + + return DoPause(); + } + } + } + static DocumentsWriter() + { + DefaultIndexingChain = new AnonymousClassIndexingChain(); + POINTER_NUM_BYTE = Constants.JRE_IS_64BIT?8:4; + } + + public static int BYTE_BLOCK_SIZE_ForNUnit + { + get { return BYTE_BLOCK_SIZE; } + } + + public static int CHAR_BLOCK_SIZE_ForNUnit + { + get { return CHAR_BLOCK_SIZE; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/DocumentsWriterThreadState.cs b/external/Lucene.Net.Light/src/core/Index/DocumentsWriterThreadState.cs new file mode 100644 index 0000000000..e20fbee8a2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/DocumentsWriterThreadState.cs @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// Used by DocumentsWriter to maintain per-thread state. + /// We keep a separate Posting hash and other state for each + /// thread and then merge postings hashes from all threads + /// when writing the segment. + /// + sealed class DocumentsWriterThreadState + { + + internal bool isIdle = true; // false if this is currently in use by a thread + internal int numThreads = 1; // Number of threads that share this instance + internal bool doFlushAfter; // true if we should flush after processing current doc + internal DocConsumerPerThread consumer; + internal DocumentsWriter.DocState docState; + + internal DocumentsWriter docWriter; + + public DocumentsWriterThreadState(DocumentsWriter docWriter) + { + this.docWriter = docWriter; + docState = new DocumentsWriter.DocState(); + docState.maxFieldLength = docWriter.maxFieldLength; + docState.infoStream = docWriter.infoStream; + docState.similarity = docWriter.similarity; + docState.docWriter = docWriter; + consumer = docWriter.consumer.AddThread(this); + } + + internal void DoAfterFlush() + { + numThreads = 0; + doFlushAfter = false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FieldInfo.cs b/external/Lucene.Net.Light/src/core/Index/FieldInfo.cs new file mode 100644 index 0000000000..bfca8afb4c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FieldInfo.cs @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + public sealed class FieldInfo : System.ICloneable + { + internal System.String name; + internal bool isIndexed; + internal int number; + + // true if term vector for this field should be stored + internal bool storeTermVector; + internal bool storeOffsetWithTermVector; + internal bool storePositionWithTermVector; + + internal bool omitNorms; // omit norms associated with indexed fields + internal bool omitTermFreqAndPositions; + + internal bool storePayloads; // whether this field stores payloads together with term positions + + internal FieldInfo(System.String na, bool tk, int nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) + { + name = na; + isIndexed = tk; + number = nu; + if (isIndexed) + { + this.storeTermVector = storeTermVector; + this.storeOffsetWithTermVector = storeOffsetWithTermVector; + this.storePositionWithTermVector = storePositionWithTermVector; + this.storePayloads = storePayloads; + this.omitNorms = omitNorms; + this.omitTermFreqAndPositions = omitTermFreqAndPositions; + } + else + { + // for non-indexed fields, leave defaults + this.storeTermVector = false; + this.storeOffsetWithTermVector = false; + this.storePositionWithTermVector = false; + this.storePayloads = false; + this.omitNorms = true; + this.omitTermFreqAndPositions = false; + } + } + + public System.Object Clone() + { + return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); + } + + internal void Update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) + { + if (this.isIndexed != isIndexed) + { + this.isIndexed = true; // once indexed, always index + } + if (isIndexed) + { + // if updated field data is not for indexing, leave the updates out + if (this.storeTermVector != storeTermVector) + { + this.storeTermVector = true; // once vector, always vector + } + if (this.storePositionWithTermVector != storePositionWithTermVector) + { + this.storePositionWithTermVector = true; // once vector, always vector + } + if (this.storeOffsetWithTermVector != storeOffsetWithTermVector) + { + this.storeOffsetWithTermVector = true; // once vector, always vector + } + if (this.storePayloads != storePayloads) + { + this.storePayloads = true; + } + if (this.omitNorms != omitNorms) + { + this.omitNorms = false; // once norms are stored, always store + } + if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) + { + this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life + } + } + } + + public bool storePayloads_ForNUnit + { + get { return storePayloads; } + } + + public System.String name_ForNUnit + { + get { return name; } + } + + public bool isIndexed_ForNUnit + { + get { return isIndexed; } + } + + public bool omitNorms_ForNUnit + { + get { return omitNorms; } + } + + public bool omitTermFreqAndPositions_ForNUnit + { + get { return omitTermFreqAndPositions; } + } + + public bool storeTermVector_ForNUnit + { + get { return storeTermVector; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FieldInfos.cs b/external/Lucene.Net.Light/src/core/Index/FieldInfos.cs new file mode 100644 index 0000000000..8c9cae6950 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FieldInfos.cs @@ -0,0 +1,491 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; +using Lucene.Net.Support; +using Document = Lucene.Net.Documents.Document; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using StringHelper = Lucene.Net.Util.StringHelper; + +namespace Lucene.Net.Index +{ + + /// Access to the Fieldable Info file that describes document fields and whether or + /// not they are indexed. Each segment has a separate Fieldable Info file. Objects + /// of this class are thread-safe for multiple readers, but only one thread can + /// be adding documents at a time, with no other reader or writer threads + /// accessing this object. + /// + public sealed class FieldInfos : ICloneable + { + + // Used internally (ie not written to *.fnm files) for pre-2.9 files + public const int FORMAT_PRE = - 1; + + // First used in 2.9; prior to 2.9 there was no format header + public const int FORMAT_START = - 2; + + internal static readonly int CURRENT_FORMAT = FORMAT_START; + + internal const byte IS_INDEXED = (0x1); + internal const byte STORE_TERMVECTOR = (0x2); + internal const byte STORE_POSITIONS_WITH_TERMVECTOR =(0x4); + internal const byte STORE_OFFSET_WITH_TERMVECTOR = (0x8); + internal const byte OMIT_NORMS = (0x10); + internal const byte STORE_PAYLOADS = (0x20); + internal const byte OMIT_TERM_FREQ_AND_POSITIONS = (0x40); + + private readonly System.Collections.Generic.List byNumber = new System.Collections.Generic.List(); + private readonly HashMap byName = new HashMap(); + private int format; + + public /*internal*/ FieldInfos() + { + } + + /// Construct a FieldInfos object using the directory and the name of the file + /// IndexInput + /// + /// The directory to open the IndexInput from + /// + /// The name of the file to open the IndexInput from in the Directory + /// + /// IOException + public /*internal*/ FieldInfos(Directory d, String name) + { + IndexInput input = d.OpenInput(name); + try + { + try + { + Read(input, name); + } + catch (System.IO.IOException) + { + if (format == FORMAT_PRE) + { + // LUCENE-1623: FORMAT_PRE (before there was a + // format) may be 2.3.2 (pre-utf8) or 2.4.x (utf8) + // encoding; retry with input set to pre-utf8 + input.Seek(0); + input.SetModifiedUTF8StringsMode(); + byNumber.Clear(); + byName.Clear(); + + bool rethrow = false; + try + { + Read(input, name); + } + catch (Exception) + { + // Ignore any new exception & set to throw original IOE + rethrow = true; + } + if(rethrow) + { + // Preserve stack trace + throw; + } + } + else + { + // The IOException cannot be caused by + // LUCENE-1623, so re-throw it + throw; + } + } + } + finally + { + input.Close(); + } + } + + /// Returns a deep clone of this FieldInfos instance. + public Object Clone() + { + lock (this) + { + var fis = new FieldInfos(); + int numField = byNumber.Count; + for (int i = 0; i < numField; i++) + { + var fi = (FieldInfo)byNumber[i].Clone(); + fis.byNumber.Add(fi); + fis.byName[fi.name] = fi; + } + return fis; + } + } + + /// Adds field info for a Document. + public void Add(Document doc) + { + lock (this) + { + System.Collections.Generic.IList fields = doc.GetFields(); + foreach(IFieldable field in fields) + { + Add(field.Name, field.IsIndexed, field.IsTermVectorStored, + field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, field.OmitNorms, + false, field.OmitTermFreqAndPositions); + } + } + } + + /// Returns true if any fields do not omitTermFreqAndPositions + internal bool HasProx() + { + int numFields = byNumber.Count; + for (int i = 0; i < numFields; i++) + { + FieldInfo fi = FieldInfo(i); + if (fi.isIndexed && !fi.omitTermFreqAndPositions) + { + return true; + } + } + return false; + } + + /// Add fields that are indexed. Whether they have termvectors has to be specified. + /// + /// + /// The names of the fields + /// + /// Whether the fields store term vectors or not + /// + /// true if positions should be stored. + /// + /// true if offsets should be stored + /// + public void AddIndexed(System.Collections.Generic.ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector) + { + lock (this) + { + foreach(string name in names) + { + Add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector); + } + } + } + + /// Assumes the fields are not storing term vectors. + /// + /// + /// The names of the fields + /// + /// Whether the fields are indexed or not + /// + /// + /// + /// + public void Add(System.Collections.Generic.ICollection names, bool isIndexed) + { + lock (this) + { + foreach(string name in names) + { + Add(name, isIndexed); + } + } + } + + /// Calls 5 parameter add with false for all TermVector parameters. + /// + /// + /// The name of the Fieldable + /// + /// true if the field is indexed + /// + /// + /// + public void Add(String name, bool isIndexed) + { + lock (this) + { + Add(name, isIndexed, false, false, false, false); + } + } + + /// Calls 5 parameter add with false for term vector positions and offsets. + /// + /// + /// The name of the field + /// + /// true if the field is indexed + /// + /// true if the term vector should be stored + /// + public void Add(System.String name, bool isIndexed, bool storeTermVector) + { + lock (this) + { + Add(name, isIndexed, storeTermVector, false, false, false); + } + } + + /// If the field is not yet known, adds it. If it is known, checks to make + /// sure that the isIndexed flag is the same as was given previously for this + /// field. If not - marks it as being indexed. Same goes for the TermVector + /// parameters. + /// + /// + /// The name of the field + /// + /// true if the field is indexed + /// + /// true if the term vector should be stored + /// + /// true if the term vector with positions should be stored + /// + /// true if the term vector with offsets should be stored + /// + public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector) + { + lock (this) + { + + Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false); + } + } + + /// If the field is not yet known, adds it. If it is known, checks to make + /// sure that the isIndexed flag is the same as was given previously for this + /// field. If not - marks it as being indexed. Same goes for the TermVector + /// parameters. + /// + /// + /// The name of the field + /// + /// true if the field is indexed + /// + /// true if the term vector should be stored + /// + /// true if the term vector with positions should be stored + /// + /// true if the term vector with offsets should be stored + /// + /// true if the norms for the indexed field should be omitted + /// + public void Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms) + { + lock (this) + { + Add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false); + } + } + + /// If the field is not yet known, adds it. If it is known, checks to make + /// sure that the isIndexed flag is the same as was given previously for this + /// field. If not - marks it as being indexed. Same goes for the TermVector + /// parameters. + /// + /// + /// The name of the field + /// + /// true if the field is indexed + /// + /// true if the term vector should be stored + /// + /// true if the term vector with positions should be stored + /// + /// true if the term vector with offsets should be stored + /// + /// true if the norms for the indexed field should be omitted + /// + /// true if payloads should be stored for this field + /// + /// true if term freqs should be omitted for this field + /// + public FieldInfo Add(System.String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) + { + lock (this) + { + FieldInfo fi = FieldInfo(name); + if (fi == null) + { + return AddInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); + } + else + { + fi.Update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); + } + return fi; + } + } + + private FieldInfo AddInternal(String name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) + { + name = StringHelper.Intern(name); + var fi = new FieldInfo(name, isIndexed, byNumber.Count, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); + byNumber.Add(fi); + byName[name] = fi; + return fi; + } + + public int FieldNumber(System.String fieldName) + { + FieldInfo fi = FieldInfo(fieldName); + return (fi != null)?fi.number:- 1; + } + + public FieldInfo FieldInfo(System.String fieldName) + { + return byName[fieldName]; + } + + /// Return the fieldName identified by its number. + /// + /// + /// + /// + /// the fieldName or an empty string when the field + /// with the given number doesn't exist. + /// + public System.String FieldName(int fieldNumber) + { + FieldInfo fi = FieldInfo(fieldNumber); + return (fi != null) ? fi.name : ""; + } + + /// Return the fieldinfo object referenced by the fieldNumber. + /// + /// + /// the FieldInfo object or null when the given fieldNumber + /// doesn't exist. + /// + public FieldInfo FieldInfo(int fieldNumber) + { + return (fieldNumber >= 0) ? byNumber[fieldNumber] : null; + } + + public int Size() + { + return byNumber.Count; + } + + public bool HasVectors() + { + bool hasVectors = false; + for (int i = 0; i < Size(); i++) + { + if (FieldInfo(i).storeTermVector) + { + hasVectors = true; + break; + } + } + return hasVectors; + } + + public void Write(Directory d, System.String name) + { + IndexOutput output = d.CreateOutput(name); + try + { + Write(output); + } + finally + { + output.Close(); + } + } + + public void Write(IndexOutput output) + { + output.WriteVInt(CURRENT_FORMAT); + output.WriteVInt(Size()); + for (int i = 0; i < Size(); i++) + { + FieldInfo fi = FieldInfo(i); + var bits = (byte) (0x0); + if (fi.isIndexed) + bits |= IS_INDEXED; + if (fi.storeTermVector) + bits |= STORE_TERMVECTOR; + if (fi.storePositionWithTermVector) + bits |= STORE_POSITIONS_WITH_TERMVECTOR; + if (fi.storeOffsetWithTermVector) + bits |= STORE_OFFSET_WITH_TERMVECTOR; + if (fi.omitNorms) + bits |= OMIT_NORMS; + if (fi.storePayloads) + bits |= STORE_PAYLOADS; + if (fi.omitTermFreqAndPositions) + bits |= OMIT_TERM_FREQ_AND_POSITIONS; + + output.WriteString(fi.name); + output.WriteByte(bits); + } + } + + private void Read(IndexInput input, String fileName) + { + int firstInt = input.ReadVInt(); + + if (firstInt < 0) + { + // This is a real format + format = firstInt; + } + else + { + format = FORMAT_PRE; + } + + if (format != FORMAT_PRE & format != FORMAT_START) + { + throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); + } + + int size; + if (format == FORMAT_PRE) + { + size = firstInt; + } + else + { + size = input.ReadVInt(); //read in the size + } + + for (int i = 0; i < size; i++) + { + String name = StringHelper.Intern(input.ReadString()); + byte bits = input.ReadByte(); + bool isIndexed = (bits & IS_INDEXED) != 0; + bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; + bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; + bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; + bool omitNorms = (bits & OMIT_NORMS) != 0; + bool storePayloads = (bits & STORE_PAYLOADS) != 0; + bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; + + AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); + } + + if (input.FilePointer != input.Length()) + { + throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length()); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FieldInvertState.cs b/external/Lucene.Net.Light/src/core/Index/FieldInvertState.cs new file mode 100644 index 0000000000..96d6c83fef --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FieldInvertState.cs @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using AttributeSource = Lucene.Net.Util.AttributeSource; + +namespace Lucene.Net.Index +{ + + /// This class tracks the number and position / offset parameters of terms + /// being added to the index. The information collected in this class is + /// also used to calculate the normalization factor for a field. + /// + ///

WARNING: This API is new and experimental, and may suddenly + /// change.

+ ///

+ public sealed class FieldInvertState + { + internal int position; + internal int length; + internal int numOverlap; + internal int offset; + internal float boost; + internal AttributeSource attributeSource; + + public FieldInvertState() + { + } + + public FieldInvertState(int position, int length, int numOverlap, int offset, float boost) + { + this.position = position; + this.length = length; + this.numOverlap = numOverlap; + this.offset = offset; + this.boost = boost; + } + + /// Re-initialize the state, using this boost value. + /// boost value to use. + /// + internal void Reset(float docBoost) + { + position = 0; + length = 0; + numOverlap = 0; + offset = 0; + boost = docBoost; + attributeSource = null; + } + + /// Get the last processed term position. + /// the position + public int Position + { + get { return position; } + } + + /// Get total number of terms in this field. + /// the length + public int Length + { + get { return length; } + } + + /// Get the number of terms with positionIncrement == 0. + /// the numOverlap + public int NumOverlap + { + get { return numOverlap; } + } + + /// Get end offset of the last processed term. + /// the offset + public int Offset + { + get { return offset; } + } + + /// Get boost value. This is the cumulative product of + /// document boost and field boost for all field instances + /// sharing the same field name. + /// + /// the boost + public float Boost + { + get { return boost; } + } + + public AttributeSource AttributeSource + { + get { return attributeSource; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FieldReaderException.cs b/external/Lucene.Net.Light/src/core/Index/FieldReaderException.cs new file mode 100644 index 0000000000..7654fb4525 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FieldReaderException.cs @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// + /// + /// + /// + [Serializable] + public class FieldReaderException:System.SystemException + { + /// Constructs a new runtime exception with null as its + /// detail message. The cause is not initialized, and may subsequently be + /// initialized by a call to . + /// + public FieldReaderException() + { + } + + /// Constructs a new runtime exception with the specified cause and a + /// detail message of (cause==null ? null : cause.toString()) + /// (which typically contains the class and detail message of + /// cause). + ///

+ /// This constructor is useful for runtime exceptions + /// that are little more than wrappers for other throwables. + /// + ///

+ /// the cause (which is saved for later retrieval by the + /// ). (A null value is + /// permitted, and indicates that the cause is nonexistent or + /// unknown.) + /// + /// 1.4 + /// + public FieldReaderException(System.Exception cause):base((cause == null)?null:cause.Message, cause) + { + } + + /// Constructs a new runtime exception with the specified detail message. + /// The cause is not initialized, and may subsequently be initialized by a + /// call to . + /// + /// + /// the detail message. The detail message is saved for + /// later retrieval by the method. + /// + public FieldReaderException(System.String message):base(message) + { + } + + /// Constructs a new runtime exception with the specified detail message and + /// cause.

Note that the detail message associated with + /// cause is not automatically incorporated in + /// this runtime exception's detail message. + /// + ///

+ /// the detail message (which is saved for later retrieval + /// by the method). + /// + /// the cause (which is saved for later retrieval by the + /// method). (A null value is + /// permitted, and indicates that the cause is nonexistent or + /// unknown.) + /// + /// 1.4 + /// + public FieldReaderException(System.String message, System.Exception cause):base(message, cause) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FieldSortedTermVectorMapper.cs b/external/Lucene.Net.Light/src/core/Index/FieldSortedTermVectorMapper.cs new file mode 100644 index 0000000000..6c1915e777 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FieldSortedTermVectorMapper.cs @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// For each Field, store a sorted collection of s + ///

+ /// This is not thread-safe. + ///

+ public class FieldSortedTermVectorMapper:TermVectorMapper + { + private readonly IDictionary> fieldToTerms = new HashMap>(); + private SortedSet currentSet; + private System.String currentField; + private readonly IComparer comparator; + + /// + /// A Comparator for sorting s + /// + public FieldSortedTermVectorMapper(IComparer comparator) + : this(false, false, comparator) + { + } + + + public FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, IComparer comparator) + : base(ignoringPositions, ignoringOffsets) + { + this.comparator = comparator; + } + + public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) + { + var entry = new TermVectorEntry(currentField, term, frequency, offsets, positions); + currentSet.Add(entry); + } + + public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) + { + currentSet = new SortedSet(comparator); + currentField = field; + fieldToTerms[field] = currentSet; + } + + /// Get the mapping between fields and terms, sorted by the comparator + /// + /// + /// A map between field names and <see cref="System.Collections.Generic.SortedDictionary{Object,Object}" />s per field. SortedSet entries are <see cref="TermVectorEntry" /> + public virtual IDictionary> FieldToTerms + { + get { return fieldToTerms; } + } + + + public virtual IComparer Comparator + { + get { return comparator; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FieldsReader.cs b/external/Lucene.Net.Light/src/core/Index/FieldsReader.cs new file mode 100644 index 0000000000..8fa351da6c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FieldsReader.cs @@ -0,0 +1,641 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using Lucene.Net.Support; +using Lucene.Net.Util; +using TokenStream = Lucene.Net.Analysis.TokenStream; +using Lucene.Net.Documents; +using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException; +using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; + +namespace Lucene.Net.Index +{ + + /// Class responsible for access to stored document fields. + ///

+ /// It uses <segment>.fdt and <segment>.fdx; files. + /// + ///

+ public sealed class FieldsReader : ICloneable, IDisposable + { + private readonly FieldInfos fieldInfos; + + // The main fieldStream, used only for cloning. + private readonly IndexInput cloneableFieldsStream; + + // This is a clone of cloneableFieldsStream used for reading documents. + // It should not be cloned outside of a synchronized context. + private readonly IndexInput fieldsStream; + + private readonly IndexInput cloneableIndexStream; + private readonly IndexInput indexStream; + private readonly int numTotalDocs; + private readonly int size; + private bool closed; + private readonly int format; + private readonly int formatSize; + + // The docID offset where our docs begin in the index + // file. This will be 0 if we have our own private file. + private readonly int docStoreOffset; + + private readonly CloseableThreadLocal fieldsStreamTL = new CloseableThreadLocal(); + private readonly bool isOriginal = false; + + /// Returns a cloned FieldsReader that shares open + /// IndexInputs with the original one. It is the caller's + /// job not to close the original FieldsReader until all + /// clones are called (eg, currently SegmentReader manages + /// this logic). + /// + public System.Object Clone() + { + EnsureOpen(); + return new FieldsReader(fieldInfos, numTotalDocs, size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); + } + + // Used only by clone + private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) + { + this.fieldInfos = fieldInfos; + this.numTotalDocs = numTotalDocs; + this.size = size; + this.format = format; + this.formatSize = formatSize; + this.docStoreOffset = docStoreOffset; + this.cloneableFieldsStream = cloneableFieldsStream; + this.cloneableIndexStream = cloneableIndexStream; + fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); + indexStream = (IndexInput) cloneableIndexStream.Clone(); + } + + public /*internal*/ FieldsReader(Directory d, String segment, FieldInfos fn):this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, - 1, 0) + { + } + + internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize):this(d, segment, fn, readBufferSize, - 1, 0) + { + } + + internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) + { + bool success = false; + isOriginal = true; + try + { + fieldInfos = fn; + + cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); + cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); + + // First version of fdx did not include a format + // header, but, the first int will always be 0 in that + // case + int firstInt = cloneableIndexStream.ReadInt(); + format = firstInt == 0 ? 0 : firstInt; + + if (format > FieldsWriter.FORMAT_CURRENT) + throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); + + formatSize = format > FieldsWriter.FORMAT ? 4 : 0; + + if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) + cloneableFieldsStream.SetModifiedUTF8StringsMode(); + + fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); + + long indexSize = cloneableIndexStream.Length() - formatSize; + + if (docStoreOffset != - 1) + { + // We read only a slice out of this shared fields file + this.docStoreOffset = docStoreOffset; + this.size = size; + + // Verify the file is long enough to hold all of our + // docs + System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); + } + else + { + this.docStoreOffset = 0; + this.size = (int) (indexSize >> 3); + } + + indexStream = (IndexInput) cloneableIndexStream.Clone(); + numTotalDocs = (int) (indexSize >> 3); + success = true; + } + finally + { + // With lock-less commits, it's entirely possible (and + // fine) to hit a FileNotFound exception above. In + // this case, we want to explicitly close any subset + // of things that were opened so that we don't have to + // wait for a GC to do so. + if (!success) + { + Dispose(); + } + } + } + + /// AlreadyClosedException if this FieldsReader is closed + internal void EnsureOpen() + { + if (closed) + { + throw new AlreadyClosedException("this FieldsReader is closed"); + } + } + + /// Closes the underlying streams, including any ones associated with a + /// lazy implementation of a Field. This means that the Fields values will not be accessible. + /// + /// + /// IOException + public void Dispose() + { + // Move to protected method if class becomes unsealed + if (!closed) + { + if (fieldsStream != null) + { + fieldsStream.Close(); + } + if (isOriginal) + { + if (cloneableFieldsStream != null) + { + cloneableFieldsStream.Close(); + } + if (cloneableIndexStream != null) + { + cloneableIndexStream.Close(); + } + } + if (indexStream != null) + { + indexStream.Close(); + } + fieldsStreamTL.Close(); + closed = true; + } + } + + public /*internal*/ int Size() + { + return size; + } + + private void SeekIndex(int docID) + { + indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L); + } + + internal bool CanReadRawDocs() + { + // Disable reading raw docs in 2.x format, because of the removal of compressed + // fields in 3.0. We don't want rawDocs() to decode field bits to figure out + // if a field was compressed, hence we enforce ordinary (non-raw) stored field merges + // for <3.0 indexes. + return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; + } + + public /*internal*/ Document Doc(int n, FieldSelector fieldSelector) + { + SeekIndex(n); + long position = indexStream.ReadLong(); + fieldsStream.Seek(position); + + var doc = new Document(); + int numFields = fieldsStream.ReadVInt(); + for (int i = 0; i < numFields; i++) + { + int fieldNumber = fieldsStream.ReadVInt(); + FieldInfo fi = fieldInfos.FieldInfo(fieldNumber); + FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name); + + byte bits = fieldsStream.ReadByte(); + System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY); + + bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; + System.Diagnostics.Debug.Assert( + (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)), + "compressed fields are only allowed in indexes of version <= 2.9"); + bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; + bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; + //TODO: Find an alternative approach here if this list continues to grow beyond the + //list of 5 or 6 currently here. See Lucene 762 for discussion + if (acceptField.Equals(FieldSelectorResult.LOAD)) + { + AddField(doc, fi, binary, compressed, tokenize); + } + else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK)) + { + AddField(doc, fi, binary, compressed, tokenize); + break; //Get out of this loop + } + else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD)) + { + AddFieldLazy(doc, fi, binary, compressed, tokenize); + } + else if (acceptField.Equals(FieldSelectorResult.SIZE)) + { + SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed)); + } + else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK)) + { + AddFieldSize(doc, fi, binary, compressed); + break; + } + else + { + SkipField(binary, compressed); + } + } + + return doc; + } + + /// Returns the length in bytes of each raw document in a + /// contiguous range of length numDocs starting with + /// startDocID. Returns the IndexInput (the fieldStream), + /// already seeked to the starting point for startDocID. + /// + internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs) + { + SeekIndex(startDocID); + long startOffset = indexStream.ReadLong(); + long lastOffset = startOffset; + int count = 0; + while (count < numDocs) + { + long offset; + int docID = docStoreOffset + startDocID + count + 1; + System.Diagnostics.Debug.Assert(docID <= numTotalDocs); + if (docID < numTotalDocs) + offset = indexStream.ReadLong(); + else + offset = fieldsStream.Length(); + lengths[count++] = (int) (offset - lastOffset); + lastOffset = offset; + } + + fieldsStream.Seek(startOffset); + + return fieldsStream; + } + + /// Skip the field. We still have to read some of the information about the field, but can skip past the actual content. + /// This will have the most payoff on large fields. + /// + private void SkipField(bool binary, bool compressed) + { + SkipField(binary, compressed, fieldsStream.ReadVInt()); + } + + private void SkipField(bool binary, bool compressed, int toRead) + { + if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) + { + fieldsStream.Seek(fieldsStream.FilePointer + toRead); + } + else + { + // We need to skip chars. This will slow us down, but still better + fieldsStream.SkipChars(toRead); + } + } + + private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize) + { + if (binary) + { + int toRead = fieldsStream.ReadVInt(); + long pointer = fieldsStream.FilePointer; + //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); + doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer, binary, compressed)); + + //Need to move the pointer ahead by toRead positions + fieldsStream.Seek(pointer + toRead); + } + else + { + const Field.Store store = Field.Store.YES; + Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize); + Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector); + + AbstractField f; + if (compressed) + { + int toRead = fieldsStream.ReadVInt(); + long pointer = fieldsStream.FilePointer; + f = new LazyField(this, fi.name, store, toRead, pointer, binary, compressed); + //skip over the part that we aren't loading + fieldsStream.Seek(pointer + toRead); + f.OmitNorms = fi.omitNorms; + f.OmitTermFreqAndPositions = fi.omitTermFreqAndPositions; + } + else + { + int length = fieldsStream.ReadVInt(); + long pointer = fieldsStream.FilePointer; + //Skip ahead of where we are by the length of what is stored + if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) + { + fieldsStream.Seek(pointer + length); + } + else + { + fieldsStream.SkipChars(length); + } + f = new LazyField(this, fi.name, store, index, termVector, length, pointer, binary, compressed) + {OmitNorms = fi.omitNorms, OmitTermFreqAndPositions = fi.omitTermFreqAndPositions}; + } + + doc.Add(f); + } + } + + private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize) + { + //we have a binary stored field, and it may be compressed + if (binary) + { + int toRead = fieldsStream.ReadVInt(); + var b = new byte[toRead]; + fieldsStream.ReadBytes(b, 0, b.Length); + doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES)); + } + else + { + const Field.Store store = Field.Store.YES; + Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize); + Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector); + + AbstractField f; + if (compressed) + { + int toRead = fieldsStream.ReadVInt(); + + var b = new byte[toRead]; + fieldsStream.ReadBytes(b, 0, b.Length); + f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index, + termVector) {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms}; + } + else + { + f = new Field(fi.name, false, fieldsStream.ReadString(), store, index, termVector) + {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms}; + } + + doc.Add(f); + } + } + + // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) + // Read just the size -- caller must skip the field content to continue reading fields + // Return the size in bytes or chars, depending on field type + private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed) + { + int size = fieldsStream.ReadVInt(), bytesize = binary || compressed?size:2 * size; + var sizebytes = new byte[4]; + sizebytes[0] = (byte) (Number.URShift(bytesize, 24)); + sizebytes[1] = (byte) (Number.URShift(bytesize, 16)); + sizebytes[2] = (byte) (Number.URShift(bytesize, 8)); + sizebytes[3] = (byte) bytesize; + doc.Add(new Field(fi.name, sizebytes, Field.Store.YES)); + return size; + } + + /// A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is + /// loaded. + /// + [Serializable] + private sealed class LazyField : AbstractField + { + private void InitBlock(FieldsReader enclosingInstance) + { + this.Enclosing_Instance = enclosingInstance; + } + + private FieldsReader Enclosing_Instance { get; set; } + + private int toRead; + private long pointer; + [Obsolete("Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0.")] + private readonly Boolean isCompressed; + + public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, Field.Index.NO, Field.TermVector.NO) + { + InitBlock(enclosingInstance); + this.toRead = toRead; + this.pointer = pointer; + this.internalIsBinary = isBinary; + if (isBinary) + internalBinaryLength = toRead; + lazy = true; + this.isCompressed = isCompressed; + } + + public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, index, termVector) + { + InitBlock(enclosingInstance); + this.toRead = toRead; + this.pointer = pointer; + this.internalIsBinary = isBinary; + if (isBinary) + internalBinaryLength = toRead; + lazy = true; + this.isCompressed = isCompressed; + } + + private IndexInput GetFieldStream() + { + IndexInput localFieldsStream = Enclosing_Instance.fieldsStreamTL.Get(); + if (localFieldsStream == null) + { + localFieldsStream = (IndexInput) Enclosing_Instance.cloneableFieldsStream.Clone(); + Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream); + } + return localFieldsStream; + } + + /// The value of the field as a Reader, or null. If null, the String value, + /// binary value, or TokenStream value is used. Exactly one of StringValue(), + /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + /// + public override TextReader ReaderValue + { + get + { + Enclosing_Instance.EnsureOpen(); + return null; + } + } + + /// The value of the field as a TokenStream, or null. If null, the Reader value, + /// String value, or binary value is used. Exactly one of StringValue(), + /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + /// + public override TokenStream TokenStreamValue + { + get + { + Enclosing_Instance.EnsureOpen(); + return null; + } + } + + /// The value of the field as a String, or null. If null, the Reader value, + /// binary value, or TokenStream value is used. Exactly one of StringValue(), + /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set. + /// + public override string StringValue + { + get + { + Enclosing_Instance.EnsureOpen(); + if (internalIsBinary) + return null; + + if (fieldsData == null) + { + IndexInput localFieldsStream = GetFieldStream(); + try + { + localFieldsStream.Seek(pointer); + if (isCompressed) + { + var b = new byte[toRead]; + localFieldsStream.ReadBytes(b, 0, b.Length); + fieldsData = + System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b)); + } + else + { + if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) + { + var bytes = new byte[toRead]; + localFieldsStream.ReadBytes(bytes, 0, toRead); + fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes); + } + else + { + //read in chars b/c we already know the length we need to read + var chars = new char[toRead]; + localFieldsStream.ReadChars(chars, 0, toRead); + fieldsData = new System.String(chars); + } + } + } + catch (System.IO.IOException e) + { + throw new FieldReaderException(e); + } + } + return (System.String) fieldsData; + } + } + + public long Pointer + { + get + { + Enclosing_Instance.EnsureOpen(); + return pointer; + } + set + { + Enclosing_Instance.EnsureOpen(); + this.pointer = value; + } + } + + public int ToRead + { + get + { + Enclosing_Instance.EnsureOpen(); + return toRead; + } + set + { + Enclosing_Instance.EnsureOpen(); + this.toRead = value; + } + } + + public override byte[] GetBinaryValue(byte[] result) + { + Enclosing_Instance.EnsureOpen(); + + if (internalIsBinary) + { + if (fieldsData == null) + { + // Allocate new buffer if result is null or too small + byte[] b; + if (result == null || result.Length < toRead) + b = new byte[toRead]; + else + b = result; + + IndexInput localFieldsStream = GetFieldStream(); + + // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people + // since they are already handling this exception when getting the document + try + { + localFieldsStream.Seek(pointer); + localFieldsStream.ReadBytes(b, 0, toRead); + fieldsData = isCompressed ? Enclosing_Instance.Uncompress(b) : b; + } + catch (IOException e) + { + throw new FieldReaderException(e); + } + + internalbinaryOffset = 0; + internalBinaryLength = toRead; + } + + return (byte[]) fieldsData; + } + return null; + } + } + + private byte[] Uncompress(byte[] b) + { + try + { + return CompressionTools.Decompress(b); + } + catch (Exception e) + { + // this will happen if the field is not compressed + throw new CorruptIndexException("field data are in wrong format: " + e, e); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FieldsWriter.cs b/external/Lucene.Net.Light/src/core/Index/FieldsWriter.cs new file mode 100644 index 0000000000..9244195cdb --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FieldsWriter.cs @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using Lucene.Net.Documents; +using Document = Lucene.Net.Documents.Document; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using RAMOutputStream = Lucene.Net.Store.RAMOutputStream; + +namespace Lucene.Net.Index +{ + + sealed class FieldsWriter : IDisposable + { + internal const byte FIELD_IS_TOKENIZED = (0x1); + internal const byte FIELD_IS_BINARY = (0x2); + [Obsolete("Kept for backwards-compatibility with <3.0 indexes; will be removed in 4.0")] + internal const byte FIELD_IS_COMPRESSED = (0x4); + + // Original format + internal const int FORMAT = 0; + + // Changed strings to UTF8 + internal const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1; + + // Lucene 3.0: Removal of compressed fields + internal static int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; + + // NOTE: if you introduce a new format, make it 1 higher + // than the current one, and always change this if you + // switch to a new format! + internal static readonly int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; + + private readonly FieldInfos fieldInfos; + + private IndexOutput fieldsStream; + + private IndexOutput indexStream; + + private readonly bool doClose; + + internal FieldsWriter(Directory d, System.String segment, FieldInfos fn) + { + fieldInfos = fn; + + bool success = false; + String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION; + try + { + fieldsStream = d.CreateOutput(fieldsName); + fieldsStream.WriteInt(FORMAT_CURRENT); + success = true; + } + finally + { + if (!success) + { + try + { + Dispose(); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + } + try + { + d.DeleteFile(fieldsName); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + } + } + } + + success = false; + String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; + try + { + indexStream = d.CreateOutput(indexName); + indexStream.WriteInt(FORMAT_CURRENT); + success = true; + } + finally + { + if (!success) + { + try + { + Dispose(); + } + catch (System.IO.IOException) + { + } + try + { + d.DeleteFile(fieldsName); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + } + try + { + d.DeleteFile(indexName); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + } + } + } + + doClose = true; + } + + internal FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) + { + fieldInfos = fn; + fieldsStream = fdt; + indexStream = fdx; + doClose = false; + } + + internal void SetFieldsStream(IndexOutput stream) + { + this.fieldsStream = stream; + } + + // Writes the contents of buffer into the fields stream + // and adds a new entry for this document into the index + // stream. This assumes the buffer was already written + // in the correct fields format. + internal void FlushDocument(int numStoredFields, RAMOutputStream buffer) + { + indexStream.WriteLong(fieldsStream.FilePointer); + fieldsStream.WriteVInt(numStoredFields); + buffer.WriteTo(fieldsStream); + } + + internal void SkipDocument() + { + indexStream.WriteLong(fieldsStream.FilePointer); + fieldsStream.WriteVInt(0); + } + + internal void Flush() + { + indexStream.Flush(); + fieldsStream.Flush(); + } + + public void Dispose() + { + // Move to protected method if class becomes unsealed + if (doClose) + { + try + { + if (fieldsStream != null) + { + try + { + fieldsStream.Close(); + } + finally + { + fieldsStream = null; + } + } + } + catch (System.IO.IOException) + { + try + { + if (indexStream != null) + { + try + { + indexStream.Close(); + } + finally + { + indexStream = null; + } + } + } + catch (System.IO.IOException) + { + // Ignore so we throw only first IOException hit + } + throw; + } + finally + { + if (indexStream != null) + { + try + { + indexStream.Close(); + } + finally + { + indexStream = null; + } + } + } + } + } + + internal void WriteField(FieldInfo fi, IFieldable field) + { + fieldsStream.WriteVInt(fi.number); + byte bits = 0; + if (field.IsTokenized) + bits |= FieldsWriter.FIELD_IS_TOKENIZED; + if (field.IsBinary) + bits |= FieldsWriter.FIELD_IS_BINARY; + + fieldsStream.WriteByte(bits); + + // compression is disabled for the current field + if (field.IsBinary) + { + byte[] data = field.GetBinaryValue(); + int len = field.BinaryLength; + int offset = field.BinaryOffset; + + fieldsStream.WriteVInt(len); + fieldsStream.WriteBytes(data, offset, len); + } + else + { + fieldsStream.WriteString(field.StringValue); + } + } + + /// Bulk write a contiguous series of documents. The + /// lengths array is the length (in bytes) of each raw + /// document. The stream IndexInput is the + /// fieldsStream from which we should bulk-copy all + /// bytes. + /// + internal void AddRawDocuments(IndexInput stream, int[] lengths, int numDocs) + { + long position = fieldsStream.FilePointer; + long start = position; + for (int i = 0; i < numDocs; i++) + { + indexStream.WriteLong(position); + position += lengths[i]; + } + fieldsStream.CopyBytes(stream, position - start); + System.Diagnostics.Debug.Assert(fieldsStream.FilePointer == position); + } + + internal void AddDocument(Document doc) + { + indexStream.WriteLong(fieldsStream.FilePointer); + + System.Collections.Generic.IList fields = doc.GetFields(); + int storedCount = fields.Count(field => field.IsStored); + fieldsStream.WriteVInt(storedCount); + + foreach(IFieldable field in fields) + { + if (field.IsStored) + WriteField(fieldInfos.FieldInfo(field.Name), field); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FilterIndexReader.cs b/external/Lucene.Net.Light/src/core/Index/FilterIndexReader.cs new file mode 100644 index 0000000000..dc61613bcc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FilterIndexReader.cs @@ -0,0 +1,388 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + /// A FilterIndexReader contains another IndexReader, which it + /// uses as its basic source of data, possibly transforming the data along the + /// way or providing additional functionality. The class + /// FilterIndexReader itself simply implements all abstract methods + /// of IndexReader with versions that pass all requests to the + /// contained index reader. Subclasses of FilterIndexReader may + /// further override some of these methods and may also provide additional + /// methods and fields. + /// + public class FilterIndexReader:IndexReader + { + + /// Base class for filtering implementations. + public class FilterTermDocs : TermDocs + { + protected internal TermDocs in_Renamed; + + public FilterTermDocs(TermDocs in_Renamed) + { + this.in_Renamed = in_Renamed; + } + + public virtual void Seek(Term term) + { + in_Renamed.Seek(term); + } + public virtual void Seek(TermEnum termEnum) + { + in_Renamed.Seek(termEnum); + } + + public virtual int Doc + { + get { return in_Renamed.Doc; } + } + + public virtual int Freq + { + get { return in_Renamed.Freq; } + } + + public virtual bool Next() + { + return in_Renamed.Next(); + } + public virtual int Read(int[] docs, int[] freqs) + { + return in_Renamed.Read(docs, freqs); + } + public virtual bool SkipTo(int i) + { + return in_Renamed.SkipTo(i); + } + + public void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + in_Renamed.Close(); + } + } + } + + /// Base class for filtering implementations. + public class FilterTermPositions:FilterTermDocs, TermPositions + { + + public FilterTermPositions(TermPositions in_Renamed):base(in_Renamed) + { + } + + public virtual int NextPosition() + { + return ((TermPositions) this.in_Renamed).NextPosition(); + } + + public virtual int PayloadLength + { + get { return ((TermPositions) this.in_Renamed).PayloadLength; } + } + + public virtual byte[] GetPayload(byte[] data, int offset) + { + return ((TermPositions) this.in_Renamed).GetPayload(data, offset); + } + + + // TODO: Remove warning after API has been finalized + + public virtual bool IsPayloadAvailable + { + get { return ((TermPositions) this.in_Renamed).IsPayloadAvailable; } + } + } + + /// Base class for filtering implementations. + public class FilterTermEnum:TermEnum + { + protected internal TermEnum in_Renamed; + + public FilterTermEnum(TermEnum in_Renamed) + { + this.in_Renamed = in_Renamed; + } + + public override bool Next() + { + return in_Renamed.Next(); + } + + public override Term Term + { + get { return in_Renamed.Term; } + } + + public override int DocFreq() + { + return in_Renamed.DocFreq(); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + in_Renamed.Close(); + } + } + } + + protected internal IndexReader in_Renamed; + + ///

Construct a FilterIndexReader based on the specified base reader. + /// Directory locking for delete, undeleteAll, and setNorm operations is + /// left to the base reader.

+ ///

Note that base reader is closed if this FilterIndexReader is closed.

+ ///

+ /// specified base reader. + /// + public FilterIndexReader(IndexReader in_Renamed):base() + { + this.in_Renamed = in_Renamed; + } + + public override Directory Directory() + { + return in_Renamed.Directory(); + } + + public override ITermFreqVector[] GetTermFreqVectors(int docNumber) + { + EnsureOpen(); + return in_Renamed.GetTermFreqVectors(docNumber); + } + + public override ITermFreqVector GetTermFreqVector(int docNumber, System.String field) + { + EnsureOpen(); + return in_Renamed.GetTermFreqVector(docNumber, field); + } + + + public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) + { + EnsureOpen(); + in_Renamed.GetTermFreqVector(docNumber, field, mapper); + } + + public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) + { + EnsureOpen(); + in_Renamed.GetTermFreqVector(docNumber, mapper); + } + + public override int NumDocs() + { + // Don't call ensureOpen() here (it could affect performance) + return in_Renamed.NumDocs(); + } + + public override int MaxDoc + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return in_Renamed.MaxDoc; + } + } + + public override Document Document(int n, FieldSelector fieldSelector) + { + EnsureOpen(); + return in_Renamed.Document(n, fieldSelector); + } + + public override bool IsDeleted(int n) + { + // Don't call ensureOpen() here (it could affect performance) + return in_Renamed.IsDeleted(n); + } + + public override bool HasDeletions + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return in_Renamed.HasDeletions; + } + } + + protected internal override void DoUndeleteAll() + { + in_Renamed.UndeleteAll(); + } + + public override bool HasNorms(System.String field) + { + EnsureOpen(); + return in_Renamed.HasNorms(field); + } + + public override byte[] Norms(System.String f) + { + EnsureOpen(); + return in_Renamed.Norms(f); + } + + public override void Norms(System.String f, byte[] bytes, int offset) + { + EnsureOpen(); + in_Renamed.Norms(f, bytes, offset); + } + + protected internal override void DoSetNorm(int d, System.String f, byte b) + { + in_Renamed.SetNorm(d, f, b); + } + + public override TermEnum Terms() + { + EnsureOpen(); + return in_Renamed.Terms(); + } + + public override TermEnum Terms(Term t) + { + EnsureOpen(); + return in_Renamed.Terms(t); + } + + public override int DocFreq(Term t) + { + EnsureOpen(); + return in_Renamed.DocFreq(t); + } + + public override TermDocs TermDocs() + { + EnsureOpen(); + return in_Renamed.TermDocs(); + } + + public override TermDocs TermDocs(Term term) + { + EnsureOpen(); + return in_Renamed.TermDocs(term); + } + + public override TermPositions TermPositions() + { + EnsureOpen(); + return in_Renamed.TermPositions(); + } + + protected internal override void DoDelete(int n) + { + in_Renamed.DeleteDocument(n); + } + + protected internal override void DoCommit(System.Collections.Generic.IDictionary commitUserData) + { + in_Renamed.Commit(commitUserData); + } + + protected internal override void DoClose() + { + in_Renamed.Close(); + // NOTE: only needed in case someone had asked for + // FieldCache for top-level reader (which is generally + // not a good idea): + Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this); + } + + + public override System.Collections.Generic.ICollection GetFieldNames(IndexReader.FieldOption fieldNames) + { + EnsureOpen(); + return in_Renamed.GetFieldNames(fieldNames); + } + + public override long Version + { + get + { + EnsureOpen(); + return in_Renamed.Version; + } + } + + public override bool IsCurrent() + { + EnsureOpen(); + return in_Renamed.IsCurrent(); + } + + public override bool IsOptimized() + { + EnsureOpen(); + return in_Renamed.IsOptimized(); + } + + public override IndexReader[] GetSequentialSubReaders() + { + return in_Renamed.GetSequentialSubReaders(); + } + + override public System.Object Clone() + { + System.Diagnostics.Debug.Fail("Port issue:", "Lets see if we need this FilterIndexReader.Clone()"); // {{Aroush-2.9}} + return null; + } + + /// + /// If the subclass of FilteredIndexReader modifies the + /// contents of the FieldCache, you must override this + /// method to provide a different key */ + /// + public override object FieldCacheKey + { + get { return in_Renamed.FieldCacheKey; } + } + + /// + /// If the subclass of FilteredIndexReader modifies the + /// deleted docs, you must override this method to provide + /// a different key */ + /// + public override object DeletesCacheKey + { + get { return in_Renamed.DeletesCacheKey; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FormatPostingsDocsConsumer.cs b/external/Lucene.Net.Light/src/core/Index/FormatPostingsDocsConsumer.cs new file mode 100644 index 0000000000..29c0558848 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FormatPostingsDocsConsumer.cs @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// NOTE: this API is experimental and will likely change + + abstract class FormatPostingsDocsConsumer + { + + /// Adds a new doc in this term. If this returns null + /// then we just skip consuming positions/payloads. + /// + internal abstract FormatPostingsPositionsConsumer AddDoc(int docID, int termDocFreq); + + /// Called when we are done adding docs to this term + internal abstract void Finish(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FormatPostingsDocsWriter.cs b/external/Lucene.Net.Light/src/core/Index/FormatPostingsDocsWriter.cs new file mode 100644 index 0000000000..82a7398027 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FormatPostingsDocsWriter.cs @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Index +{ + /// Consumes doc and freq, writing them using the current + /// index file format + /// + sealed class FormatPostingsDocsWriter : FormatPostingsDocsConsumer, IDisposable + { + + internal IndexOutput out_Renamed; + internal FormatPostingsTermsWriter parent; + internal FormatPostingsPositionsWriter posWriter; + internal DefaultSkipListWriter skipListWriter; + internal int skipInterval; + internal int totalNumDocs; + + internal bool omitTermFreqAndPositions; + internal bool storePayloads; + internal long freqStart; + internal FieldInfo fieldInfo; + + internal FormatPostingsDocsWriter(SegmentWriteState state, FormatPostingsTermsWriter parent):base() + { + this.parent = parent; + System.String fileName = IndexFileNames.SegmentFileName(parent.parent.segment, IndexFileNames.FREQ_EXTENSION); + state.flushedFiles.Add(fileName); + out_Renamed = parent.parent.dir.CreateOutput(fileName); + totalNumDocs = parent.parent.totalNumDocs; + + // TODO: abstraction violation + skipInterval = parent.parent.termsOut.skipInterval; + skipListWriter = parent.parent.skipListWriter; + skipListWriter.SetFreqOutput(out_Renamed); + + posWriter = new FormatPostingsPositionsWriter(state, this); + } + + internal void SetField(FieldInfo fieldInfo) + { + this.fieldInfo = fieldInfo; + omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + storePayloads = fieldInfo.storePayloads; + posWriter.SetField(fieldInfo); + } + + internal int lastDocID; + internal int df; + + /// Adds a new doc in this term. If this returns null + /// then we just skip consuming positions/payloads. + /// + internal override FormatPostingsPositionsConsumer AddDoc(int docID, int termDocFreq) + { + + int delta = docID - lastDocID; + + if (docID < 0 || (df > 0 && delta <= 0)) + throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )"); + + if ((++df % skipInterval) == 0) + { + // TODO: abstraction violation + skipListWriter.SetSkipData(lastDocID, storePayloads, posWriter.lastPayloadLength); + skipListWriter.BufferSkip(df); + } + + System.Diagnostics.Debug.Assert(docID < totalNumDocs, "docID=" + docID + " totalNumDocs=" + totalNumDocs); + + lastDocID = docID; + if (omitTermFreqAndPositions) + out_Renamed.WriteVInt(delta); + else if (1 == termDocFreq) + out_Renamed.WriteVInt((delta << 1) | 1); + else + { + out_Renamed.WriteVInt(delta << 1); + out_Renamed.WriteVInt(termDocFreq); + } + + return posWriter; + } + + private TermInfo termInfo = new TermInfo(); // minimize consing + internal UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result(); + + /// Called when we are done adding docs to this term + internal override void Finish() + { + long skipPointer = skipListWriter.WriteSkip(out_Renamed); + + // TODO: this is abstraction violation -- we should not + // peek up into parents terms encoding format + termInfo.Set(df, parent.freqStart, parent.proxStart, (int) (skipPointer - parent.freqStart)); + + // TODO: we could do this incrementally + UnicodeUtil.UTF16toUTF8(parent.currentTerm, parent.currentTermStart, utf8); + + if (df > 0) + { + parent.termsOut.Add(fieldInfo.number, utf8.result, utf8.length, termInfo); + } + + lastDocID = 0; + df = 0; + } + + public void Dispose() + { + // Move to protected method if class becomes unsealed + out_Renamed.Dispose(); + posWriter.Dispose(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FormatPostingsFieldsConsumer.cs b/external/Lucene.Net.Light/src/core/Index/FormatPostingsFieldsConsumer.cs new file mode 100644 index 0000000000..a3f86ec238 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FormatPostingsFieldsConsumer.cs @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// Abstract API that consumes terms, doc, freq, prox and + /// payloads postings. Concrete implementations of this + /// actually do "something" with the postings (write it into + /// the index in a specific format). + /// + /// NOTE: this API is experimental and will likely change + /// + abstract class FormatPostingsFieldsConsumer + { + + /// Add a new field + internal abstract FormatPostingsTermsConsumer AddField(FieldInfo field); + + /// Called when we are done adding everything. + internal abstract void Finish(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FormatPostingsFieldsWriter.cs b/external/Lucene.Net.Light/src/core/Index/FormatPostingsFieldsWriter.cs new file mode 100644 index 0000000000..40ef619305 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FormatPostingsFieldsWriter.cs @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + sealed class FormatPostingsFieldsWriter:FormatPostingsFieldsConsumer + { + + internal Directory dir; + internal System.String segment; + internal TermInfosWriter termsOut; + internal FieldInfos fieldInfos; + internal FormatPostingsTermsWriter termsWriter; + internal DefaultSkipListWriter skipListWriter; + internal int totalNumDocs; + + public FormatPostingsFieldsWriter(SegmentWriteState state, FieldInfos fieldInfos):base() + { + + dir = state.directory; + segment = state.segmentName; + totalNumDocs = state.numDocs; + this.fieldInfos = fieldInfos; + termsOut = new TermInfosWriter(dir, segment, fieldInfos, state.termIndexInterval); + + // TODO: this is a nasty abstraction violation (that we + // peek down to find freqOut/proxOut) -- we need a + // better abstraction here whereby these child consumers + // can provide skip data or not + skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, null, null); + + state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_EXTENSION)); + state.flushedFiles.Add(state.SegmentFileName(IndexFileNames.TERMS_INDEX_EXTENSION)); + + termsWriter = new FormatPostingsTermsWriter(state, this); + } + + /// Add a new field + internal override FormatPostingsTermsConsumer AddField(FieldInfo field) + { + termsWriter.SetField(field); + return termsWriter; + } + + /// Called when we are done adding everything. + internal override void Finish() + { + termsOut.Dispose(); + termsWriter.Dispose(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FormatPostingsPositionsConsumer.cs b/external/Lucene.Net.Light/src/core/Index/FormatPostingsPositionsConsumer.cs new file mode 100644 index 0000000000..f5bc440cf9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FormatPostingsPositionsConsumer.cs @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Index +{ + + abstract class FormatPostingsPositionsConsumer + { + + /// Add a new position & payload. If payloadLength > 0 + /// you must read those bytes from the IndexInput. + /// + internal abstract void AddPosition(int position, byte[] payload, int payloadOffset, int payloadLength); + + /// Called when we are done adding positions & payloads + internal abstract void Finish(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FormatPostingsPositionsWriter.cs b/external/Lucene.Net.Light/src/core/Index/FormatPostingsPositionsWriter.cs new file mode 100644 index 0000000000..8b70fcc3e3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FormatPostingsPositionsWriter.cs @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; + +namespace Lucene.Net.Index +{ + + sealed class FormatPostingsPositionsWriter:FormatPostingsPositionsConsumer + { + internal FormatPostingsDocsWriter parent; + internal IndexOutput out_Renamed; + + internal bool omitTermFreqAndPositions; + internal bool storePayloads; + internal int lastPayloadLength = - 1; + + internal FormatPostingsPositionsWriter(SegmentWriteState state, FormatPostingsDocsWriter parent) + { + this.parent = parent; + omitTermFreqAndPositions = parent.omitTermFreqAndPositions; + if (parent.parent.parent.fieldInfos.HasProx()) + { + // At least one field does not omit TF, so create the + // prox file + System.String fileName = IndexFileNames.SegmentFileName(parent.parent.parent.segment, IndexFileNames.PROX_EXTENSION); + state.flushedFiles.Add(fileName); + out_Renamed = parent.parent.parent.dir.CreateOutput(fileName); + parent.skipListWriter.SetProxOutput(out_Renamed); + } + // Every field omits TF so we will write no prox file + else + out_Renamed = null; + } + + internal int lastPosition; + + /// Add a new position & payload + internal override void AddPosition(int position, byte[] payload, int payloadOffset, int payloadLength) + { + System.Diagnostics.Debug.Assert(!omitTermFreqAndPositions, "omitTermFreqAndPositions is true"); + System.Diagnostics.Debug.Assert(out_Renamed != null); + + int delta = position - lastPosition; + lastPosition = position; + + if (storePayloads) + { + if (payloadLength != lastPayloadLength) + { + lastPayloadLength = payloadLength; + out_Renamed.WriteVInt((delta << 1) | 1); + out_Renamed.WriteVInt(payloadLength); + } + else + out_Renamed.WriteVInt(delta << 1); + if (payloadLength > 0) + out_Renamed.WriteBytes(payload, payloadLength); + } + else + out_Renamed.WriteVInt(delta); + } + + internal void SetField(FieldInfo fieldInfo) + { + omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + storePayloads = omitTermFreqAndPositions?false:fieldInfo.storePayloads; + } + + /// Called when we are done adding positions & payloads + internal override void Finish() + { + lastPosition = 0; + lastPayloadLength = - 1; + } + + public void Dispose() + { + // Move to protected method if class becomes unsealed + if (out_Renamed != null) + out_Renamed.Close(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FormatPostingsTermsConsumer.cs b/external/Lucene.Net.Light/src/core/Index/FormatPostingsTermsConsumer.cs new file mode 100644 index 0000000000..637ecffe6c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FormatPostingsTermsConsumer.cs @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using ArrayUtil = Lucene.Net.Util.ArrayUtil; + +namespace Lucene.Net.Index +{ + + /// NOTE: this API is experimental and will likely change + + abstract class FormatPostingsTermsConsumer + { + + /// Adds a new term in this field; term ends with U+FFFF + /// char + /// + internal abstract FormatPostingsDocsConsumer AddTerm(char[] text, int start); + + internal char[] termBuffer; + internal virtual FormatPostingsDocsConsumer AddTerm(System.String text) + { + int len = text.Length; + if (termBuffer == null || termBuffer.Length < 1 + len) + termBuffer = new char[ArrayUtil.GetNextSize(1 + len)]; + for (int i = 0; i < len; i++) + { + termBuffer[i] = (char) text[i]; + } + termBuffer[len] = (char) (0xffff); + return AddTerm(termBuffer, 0); + } + + /// Called when we are done adding terms to this field + internal abstract void Finish(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FormatPostingsTermsWriter.cs b/external/Lucene.Net.Light/src/core/Index/FormatPostingsTermsWriter.cs new file mode 100644 index 0000000000..87d20269de --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FormatPostingsTermsWriter.cs @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + sealed class FormatPostingsTermsWriter : FormatPostingsTermsConsumer, IDisposable + { + internal FormatPostingsFieldsWriter parent; + internal FormatPostingsDocsWriter docsWriter; + internal TermInfosWriter termsOut; + internal FieldInfo fieldInfo; + + internal FormatPostingsTermsWriter(SegmentWriteState state, FormatPostingsFieldsWriter parent):base() + { + this.parent = parent; + termsOut = parent.termsOut; + docsWriter = new FormatPostingsDocsWriter(state, this); + } + + internal void SetField(FieldInfo fieldInfo) + { + this.fieldInfo = fieldInfo; + docsWriter.SetField(fieldInfo); + } + + internal char[] currentTerm; + internal int currentTermStart; + + internal long freqStart; + internal long proxStart; + + /// Adds a new term in this field + internal override FormatPostingsDocsConsumer AddTerm(char[] text, int start) + { + currentTerm = text; + currentTermStart = start; + + // TODO: this is abstraction violation -- ideally this + // terms writer is not so "invasive", looking for file + // pointers in its child consumers. + freqStart = docsWriter.out_Renamed.FilePointer; + if (docsWriter.posWriter.out_Renamed != null) + proxStart = docsWriter.posWriter.out_Renamed.FilePointer; + + parent.skipListWriter.ResetSkip(); + + return docsWriter; + } + + /// Called when we are done adding terms to this field + internal override void Finish() + { + } + + public void Dispose() + { + docsWriter.Dispose(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FreqProxFieldMergeState.cs b/external/Lucene.Net.Light/src/core/Index/FreqProxFieldMergeState.cs new file mode 100644 index 0000000000..5306918518 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FreqProxFieldMergeState.cs @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + // TODO FI: some of this is "generic" to TermsHash* so we + // should factor it out so other consumers don't have to + // duplicate this code + + /// Used by DocumentsWriter to merge the postings from + /// multiple ThreadStates when creating a segment + /// + sealed class FreqProxFieldMergeState + { + + internal FreqProxTermsWriterPerField field; + internal int numPostings; + internal CharBlockPool charPool; + internal RawPostingList[] postings; + + private FreqProxTermsWriter.PostingList p; + internal char[] text; + internal int textOffset; + + private int postingUpto = - 1; + + internal ByteSliceReader freq = new ByteSliceReader(); + internal ByteSliceReader prox = new ByteSliceReader(); + + internal int docID; + internal int termFreq; + + public FreqProxFieldMergeState(FreqProxTermsWriterPerField field) + { + this.field = field; + this.charPool = field.perThread.termsHashPerThread.charPool; + this.numPostings = field.termsHashPerField.numPostings; + this.postings = field.termsHashPerField.SortPostings(); + } + + internal bool NextTerm() + { + postingUpto++; + if (postingUpto == numPostings) + return false; + + p = (FreqProxTermsWriter.PostingList) postings[postingUpto]; + docID = 0; + + text = charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + textOffset = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK; + + field.termsHashPerField.InitReader(freq, p, 0); + if (!field.fieldInfo.omitTermFreqAndPositions) + field.termsHashPerField.InitReader(prox, p, 1); + + // Should always be true + bool result = NextDoc(); + System.Diagnostics.Debug.Assert(result); + + return true; + } + + public bool NextDoc() + { + if (freq.Eof()) + { + if (p.lastDocCode != - 1) + { + // Return last doc + docID = p.lastDocID; + if (!field.omitTermFreqAndPositions) + termFreq = p.docFreq; + p.lastDocCode = - 1; + return true; + } + // EOF + else + return false; + } + + int code = freq.ReadVInt(); + if (field.omitTermFreqAndPositions) + docID += code; + else + { + docID += Number.URShift(code, 1); + if ((code & 1) != 0) + termFreq = 1; + else + termFreq = freq.ReadVInt(); + } + + System.Diagnostics.Debug.Assert(docID != p.lastDocID); + + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriter.cs b/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriter.cs new file mode 100644 index 0000000000..f98d646e03 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriter.cs @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Index +{ + sealed class FreqProxTermsWriter : TermsHashConsumer + { + public override TermsHashConsumerPerThread AddThread(TermsHashPerThread perThread) + { + return new FreqProxTermsWriterPerThread(perThread); + } + + internal override void CreatePostings(RawPostingList[] postings, int start, int count) + { + int end = start + count; + for (int i = start; i < end; i++) + postings[i] = new PostingList(); + } + + private static int compareText(char[] text1, int pos1, char[] text2, int pos2) + { + while (true) + { + char c1 = text1[pos1++]; + char c2 = text2[pos2++]; + if (c1 != c2) + { + if (0xffff == c2) + return 1; + else if (0xffff == c1) + return - 1; + else + return c1 - c2; + } + else if (0xffff == c1) + return 0; + } + } + + internal override void CloseDocStore(SegmentWriteState state) + { + } + public override void Abort() + { + } + + + // TODO: would be nice to factor out more of this, eg the + // FreqProxFieldMergeState, and code to visit all Fields + // under the same FieldInfo together, up into TermsHash*. + // Other writers would presumably share alot of this... + public override void Flush(IDictionary> threadsAndFields, SegmentWriteState state) + { + + // Gather all FieldData's that have postings, across all + // ThreadStates + var allFields = new List(); + + foreach(var entry in threadsAndFields) + { + var fields = entry.Value; + + foreach(var i in fields) + { + FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField)i; + if (perField.termsHashPerField.numPostings > 0) + allFields.Add(perField); + } + } + + // Sort by field name + allFields.Sort(); + int numAllFields = allFields.Count; + + // TODO: allow Lucene user to customize this consumer: + FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); + /* + Current writer chain: + FormatPostingsFieldsConsumer + -> IMPL: FormatPostingsFieldsWriter + -> FormatPostingsTermsConsumer + -> IMPL: FormatPostingsTermsWriter + -> FormatPostingsDocConsumer + -> IMPL: FormatPostingsDocWriter + -> FormatPostingsPositionsConsumer + -> IMPL: FormatPostingsPositionsWriter + */ + + int start = 0; + while (start < numAllFields) + { + FieldInfo fieldInfo = allFields[start].fieldInfo; + System.String fieldName = fieldInfo.name; + + int end = start + 1; + while (end < numAllFields && allFields[end].fieldInfo.name.Equals(fieldName)) + end++; + + FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end - start]; + for (int i = start; i < end; i++) + { + fields[i - start] = allFields[i]; + + // Aggregate the storePayload as seen by the same + // field across multiple threads + fieldInfo.storePayloads |= fields[i - start].hasPayloads; + } + + // If this field has postings then add them to the + // segment + AppendPostings(fields, consumer); + + for (int i = 0; i < fields.Length; i++) + { + TermsHashPerField perField = fields[i].termsHashPerField; + int numPostings = perField.numPostings; + perField.Reset(); + perField.ShrinkHash(numPostings); + fields[i].Reset(); + } + + start = end; + } + + foreach(var entry in threadsAndFields) + { + FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.Key; + perThread.termsHashPerThread.Reset(true); + } + + consumer.Finish(); + } + + private byte[] payloadBuffer; + + /* Walk through all unique text tokens (Posting + * instances) found in this field and serialize them + * into a single RAM segment. */ + internal void AppendPostings(FreqProxTermsWriterPerField[] fields, FormatPostingsFieldsConsumer consumer) + { + + int numFields = fields.Length; + + FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; + + for (int i = 0; i < numFields; i++) + { + FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i]); + + System.Diagnostics.Debug.Assert(fms.field.fieldInfo == fields [0].fieldInfo); + + // Should always be true + bool result = fms.NextTerm(); + System.Diagnostics.Debug.Assert(result); + } + + FormatPostingsTermsConsumer termsConsumer = consumer.AddField(fields[0].fieldInfo); + + FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields]; + + bool currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions; + + while (numFields > 0) + { + + // Get the next term to merge + termStates[0] = mergeStates[0]; + int numToMerge = 1; + + for (int i = 1; i < numFields; i++) + { + char[] text = mergeStates[i].text; + int textOffset = mergeStates[i].textOffset; + int cmp = compareText(text, textOffset, termStates[0].text, termStates[0].textOffset); + + if (cmp < 0) + { + termStates[0] = mergeStates[i]; + numToMerge = 1; + } + else if (cmp == 0) + termStates[numToMerge++] = mergeStates[i]; + } + + FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(termStates[0].text, termStates[0].textOffset); + + // Now termStates has numToMerge FieldMergeStates + // which all share the same term. Now we must + // interleave the docID streams. + while (numToMerge > 0) + { + + FreqProxFieldMergeState minState = termStates[0]; + for (int i = 1; i < numToMerge; i++) + if (termStates[i].docID < minState.docID) + minState = termStates[i]; + + int termDocFreq = minState.termFreq; + + FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(minState.docID, termDocFreq); + + ByteSliceReader prox = minState.prox; + + // Carefully copy over the prox + payload info, + // changing the format to match Lucene's segment + // format. + if (!currentFieldOmitTermFreqAndPositions) + { + // omitTermFreqAndPositions == false so we do write positions & + // payload + int position = 0; + for (int j = 0; j < termDocFreq; j++) + { + int code = prox.ReadVInt(); + position += (code >> 1); + + int payloadLength; + if ((code & 1) != 0) + { + // This position has a payload + payloadLength = prox.ReadVInt(); + + if (payloadBuffer == null || payloadBuffer.Length < payloadLength) + payloadBuffer = new byte[payloadLength]; + + prox.ReadBytes(payloadBuffer, 0, payloadLength); + } + else + payloadLength = 0; + + posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); + } //End for + + posConsumer.Finish(); + } + + if (!minState.NextDoc()) + { + + // Remove from termStates + int upto = 0; + for (int i = 0; i < numToMerge; i++) + if (termStates[i] != minState) + termStates[upto++] = termStates[i]; + numToMerge--; + System.Diagnostics.Debug.Assert(upto == numToMerge); + + // Advance this state to the next term + + if (!minState.NextTerm()) + { + // OK, no more terms, so remove from mergeStates + // as well + upto = 0; + for (int i = 0; i < numFields; i++) + if (mergeStates[i] != minState) + mergeStates[upto++] = mergeStates[i]; + numFields--; + System.Diagnostics.Debug.Assert(upto == numFields); + } + } + } + + docConsumer.Finish(); + } + + termsConsumer.Finish(); + } + + internal UnicodeUtil.UTF8Result termsUTF8 = new UnicodeUtil.UTF8Result(); + + internal sealed class PostingList:RawPostingList + { + internal int docFreq; // # times this term occurs in the current doc + internal int lastDocID; // Last docID where this term occurred + internal int lastDocCode; // Code for prior doc + internal int lastPosition; // Last position where this term occurred + } + + internal override int BytesPerPosting() + { + return RawPostingList.BYTES_SIZE + 4 * DocumentsWriter.INT_NUM_BYTE; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriterPerField.cs b/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriterPerField.cs new file mode 100644 index 0000000000..c654b48e36 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriterPerField.cs @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + + // TODO: break into separate freq and prox writers as + // codecs; make separate container (tii/tis/skip/*) that can + // be configured as any number of files 1..N + sealed class FreqProxTermsWriterPerField:TermsHashConsumerPerField, System.IComparable + { + + internal FreqProxTermsWriterPerThread perThread; + internal TermsHashPerField termsHashPerField; + internal FieldInfo fieldInfo; + internal DocumentsWriter.DocState docState; + internal FieldInvertState fieldState; + internal bool omitTermFreqAndPositions; + internal IPayloadAttribute payloadAttribute; + + public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) + { + this.termsHashPerField = termsHashPerField; + this.perThread = perThread; + this.fieldInfo = fieldInfo; + docState = termsHashPerField.docState; + fieldState = termsHashPerField.fieldState; + omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + } + + internal override int GetStreamCount() + { + if (fieldInfo.omitTermFreqAndPositions) + return 1; + else + return 2; + } + + internal override void Finish() + { + } + + internal bool hasPayloads; + + internal override void SkippingLongTerm() + { + } + + public int CompareTo(FreqProxTermsWriterPerField other) + { + return String.CompareOrdinal(fieldInfo.name, other.fieldInfo.name); + } + + internal void Reset() + { + // Record, up front, whether our in-RAM format will be + // with or without term freqs: + omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + payloadAttribute = null; + } + + internal override bool Start(IFieldable[] fields, int count) + { + for (int i = 0; i < count; i++) + if (fields[i].IsIndexed) + return true; + return false; + } + + internal override void Start(IFieldable f) + { + if (fieldState.attributeSource.HasAttribute()) + { + payloadAttribute = fieldState.attributeSource.GetAttribute(); + } + else + { + payloadAttribute = null; + } + } + + internal void WriteProx(FreqProxTermsWriter.PostingList p, int proxCode) + { + Payload payload; + if (payloadAttribute == null) + { + payload = null; + } + else + { + payload = payloadAttribute.Payload; + } + + if (payload != null && payload.internalLength > 0) + { + termsHashPerField.WriteVInt(1, (proxCode << 1) | 1); + termsHashPerField.WriteVInt(1, payload.internalLength); + termsHashPerField.WriteBytes(1, payload.data, payload.internalOffset, payload.internalLength); + hasPayloads = true; + } + else + termsHashPerField.WriteVInt(1, proxCode << 1); + p.lastPosition = fieldState.position; + } + + internal override void NewTerm(RawPostingList p0) + { + // First time we're seeing this term since the last + // flush + System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.newTerm start")); + FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0; + p.lastDocID = docState.docID; + if (omitTermFreqAndPositions) + { + p.lastDocCode = docState.docID; + } + else + { + p.lastDocCode = docState.docID << 1; + p.docFreq = 1; + WriteProx(p, fieldState.position); + } + } + + internal override void AddTerm(RawPostingList p0) + { + + System.Diagnostics.Debug.Assert(docState.TestPoint("FreqProxTermsWriterPerField.addTerm start")); + + FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0; + + System.Diagnostics.Debug.Assert(omitTermFreqAndPositions || p.docFreq > 0); + + if (omitTermFreqAndPositions) + { + if (docState.docID != p.lastDocID) + { + System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID); + termsHashPerField.WriteVInt(0, p.lastDocCode); + p.lastDocCode = docState.docID - p.lastDocID; + p.lastDocID = docState.docID; + } + } + else + { + if (docState.docID != p.lastDocID) + { + System.Diagnostics.Debug.Assert(docState.docID > p.lastDocID); + // Term not yet seen in the current doc but previously + // seen in other doc(s) since the last flush + + // Now that we know doc freq for previous doc, + // write it & lastDocCode + if (1 == p.docFreq) + termsHashPerField.WriteVInt(0, p.lastDocCode | 1); + else + { + termsHashPerField.WriteVInt(0, p.lastDocCode); + termsHashPerField.WriteVInt(0, p.docFreq); + } + p.docFreq = 1; + p.lastDocCode = (docState.docID - p.lastDocID) << 1; + p.lastDocID = docState.docID; + WriteProx(p, fieldState.position); + } + else + { + p.docFreq++; + WriteProx(p, fieldState.position - p.lastPosition); + } + } + } + + public void Abort() + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriterPerThread.cs b/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriterPerThread.cs new file mode 100644 index 0000000000..01f1ae9ca2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/FreqProxTermsWriterPerThread.cs @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + sealed class FreqProxTermsWriterPerThread:TermsHashConsumerPerThread + { + internal TermsHashPerThread termsHashPerThread; + internal DocumentsWriter.DocState docState; + + public FreqProxTermsWriterPerThread(TermsHashPerThread perThread) + { + docState = perThread.docState; + termsHashPerThread = perThread; + } + + public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) + { + return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo); + } + + public override void StartDocument() + { + } + + public override DocumentsWriter.DocWriter FinishDocument() + { + return null; + } + + public override void Abort() + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/IndexCommit.cs b/external/Lucene.Net.Light/src/core/Index/IndexCommit.cs new file mode 100644 index 0000000000..306d7f1905 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/IndexCommit.cs @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Store; +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + ///

Expert: represents a single commit into an index as seen by the + /// or .

+ /// + ///

Changes to the content of an index are made visible + /// only after the writer who made that change commits by + /// writing a new segments file + /// (segments_N). This point in time, when the + /// action of writing of a new segments file to the directory + /// is completed, is an index commit.

+ /// + ///

Each index commit point has a unique segments file + /// associated with it. The segments file associated with a + /// later index commit point would have a larger N.

+ /// + ///

WARNING: This API is a new and experimental and + /// may suddenly change.

+ ///

+ + public abstract class IndexCommit + { + /// Get the segments file (segments_N) associated + /// with this commit point. + /// + public abstract string SegmentsFileName { get; } + + /// Returns all index files referenced by this commit point. + public abstract ICollection FileNames { get; } + + /// Returns the for the index. + public abstract Directory Directory { get; } + + /// Delete this commit point. This only applies when using + /// the commit point in the context of IndexWriter's + /// IndexDeletionPolicy. + ///

+ /// Upon calling this, the writer is notified that this commit + /// point should be deleted. + ///

+ /// Decision that a commit-point should be deleted is taken by the in effect + /// and therefore this should only be called by its or + /// methods. + ///

+ public abstract void Delete(); + + public abstract bool IsDeleted { get; } + + /// Returns true if this commit is an optimized index. + public abstract bool IsOptimized { get; } + + /// Two IndexCommits are equal if both their Directory and versions are equal. + public override bool Equals(System.Object other) + { + if (other is IndexCommit) + { + IndexCommit otherCommit = (IndexCommit) other; + return otherCommit.Directory.Equals(Directory) && otherCommit.Version == Version; + } + else + return false; + } + + public override int GetHashCode() + { + return (int)(Directory.GetHashCode() + Version); + } + + /// Returns the version for this IndexCommit. This is the + /// same value that would + /// return if it were opened on this commit. + /// + public abstract long Version { get; } + + /// Returns the generation (the _N in segments_N) for this + /// IndexCommit + /// + public abstract long Generation { get; } + + /// Convenience method that returns the last modified time + /// of the segments_N file corresponding to this index + /// commit, equivalent to + /// getDirectory().fileModified(getSegmentsFileName()). + /// + public virtual long Timestamp + { + get { return Directory.FileModified(SegmentsFileName); } + } + + /// Returns userData, previously passed to + /// + /// for this commit. IDictionary is String -> String. + /// + public abstract IDictionary UserData { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/IndexDeletionPolicy.cs b/external/Lucene.Net.Light/src/core/Index/IndexDeletionPolicy.cs new file mode 100644 index 0000000000..bef9924c64 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/IndexDeletionPolicy.cs @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + + ///

Expert: policy for deletion of stale index commits. + /// + ///

Implement this interface, and pass it to one + /// of the or + /// constructors, to customize when older + /// point-in-time commits + /// are deleted from the index directory. The default deletion policy + /// is , which always + /// removes old commits as soon as a new commit is done (this + /// matches the behavior before 2.2).

+ /// + ///

One expected use case for this (and the reason why it + /// was first created) is to work around problems with an + /// index directory accessed via filesystems like NFS because + /// NFS does not provide the "delete on last close" semantics + /// that Lucene's "point in time" search normally relies on. + /// By implementing a custom deletion policy, such as "a + /// commit is only removed once it has been stale for more + /// than X minutes", you can give your readers time to + /// refresh to the new commit before + /// removes the old commits. Note that doing so will + /// increase the storage requirements of the index. See LUCENE-710 + /// for details.

+ ///

+ + public interface IndexDeletionPolicy + { + + ///

This is called once when a writer is first + /// instantiated to give the policy a chance to remove old + /// commit points.

+ /// + ///

The writer locates all index commits present in the + /// index directory and calls this method. The policy may + /// choose to delete some of the commit points, doing so by + /// calling method + /// of .

+ /// + ///

Note: the last CommitPoint is the most recent one, + /// i.e. the "front index state". Be careful not to delete it, + /// unless you know for sure what you are doing, and unless + /// you can afford to lose the index content while doing that. + /// + ///

+ /// List of current + /// point-in-time commits, + /// sorted by age (the 0th one is the oldest commit). + /// + void OnInit(IList commits) where T : IndexCommit; + + /// + ///

This is called each time the writer completed a commit. + /// This gives the policy a chance to remove old commit points + /// with each commit.

+ /// + ///

The policy may now choose to delete old commit points + /// by calling method + /// of .

+ /// + ///

This method is only called when + /// or is called, or possibly not at + /// all if the is called.

+ /// + ///

Note: the last CommitPoint is the most recent one, + /// i.e. the "front index state". Be careful not to delete it, + /// unless you know for sure what you are doing, and unless + /// you can afford to lose the index content while doing that.

+ ///
+ /// + /// List of , sorted by age (the 0th one is the oldest commit). + /// + void OnCommit(IList commits) where T : IndexCommit; + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/IndexFileDeleter.cs b/external/Lucene.Net.Light/src/core/Index/IndexFileDeleter.cs new file mode 100644 index 0000000000..3ac815d86b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/IndexFileDeleter.cs @@ -0,0 +1,808 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + /// + /// This class keeps track of each SegmentInfos instance that + /// is still "live", either because it corresponds to a + /// segments_N file in the Directory (a "commit", i.e. a + /// committed SegmentInfos) or because it's an in-memory + /// SegmentInfos that a writer is actively updating but has + /// not yet committed. This class uses simple reference + /// counting to map the live SegmentInfos instances to + /// individual files in the Directory. + /// + /// The same directory file may be referenced by more than + /// one IndexCommit, i.e. more than one SegmentInfos. + /// Therefore we count how many commits reference each file. + /// When all the commits referencing a certain file have been + /// deleted, the refcount for that file becomes zero, and the + /// file is deleted. + /// + /// A separate deletion policy interface + /// (IndexDeletionPolicy) is consulted on creation (onInit) + /// and once per commit (onCommit), to decide when a commit + /// should be removed. + /// + /// It is the business of the IndexDeletionPolicy to choose + /// when to delete commit points. The actual mechanics of + /// file deletion, retrying, etc, derived from the deletion + /// of commit points is the business of the IndexFileDeleter. + /// + /// The current default deletion policy is + /// , which removes all + /// prior commits when a new commit has completed. This + /// matches the behavior before 2.2. + /// + /// Note that you must hold the write.lock before + /// instantiating this class. It opens segments_N file(s) + /// directly with no retry logic. + /// + + public sealed class IndexFileDeleter : IDisposable + { + + //// Files that we tried to delete but failed (likely + /// because they are open and we are running on Windows), + /// so we will retry them again later: //// + private IList deletable; + + //// Reference count for all files in the index. + /// Counts how many existing commits reference a file. + /// Maps String to RefCount (class below) instances: //// + private IDictionary refCounts = new HashMap(); + + //// Holds all commits (segments_N) currently in the index. + /// This will have just 1 commit if you are using the + /// default delete policy (KeepOnlyLastCommitDeletionPolicy). + /// Other policies may leave commit points live for longer + /// in which case this list would be longer than 1: //// + private List commits = new List(); + + //// Holds files we had incref'd from the previous + /// non-commit checkpoint: //// + private List> lastFiles = new List>(); + + //// Commits that the IndexDeletionPolicy have decided to delete: //// + private List commitsToDelete = new List(); + + private System.IO.StreamWriter infoStream; + private Directory directory; + private IndexDeletionPolicy policy; + private DocumentsWriter docWriter; + + internal bool startingCommitDeleted; + private SegmentInfos lastSegmentInfos; + + private HashSet synced; + + /// Change to true to see details of reference counts when + /// infoStream != null + /// + public static bool VERBOSE_REF_COUNTS = false; + + internal void SetInfoStream(System.IO.StreamWriter infoStream) + { + this.infoStream = infoStream; + if (infoStream != null) + { + Message("setInfoStream deletionPolicy=" + policy); + } + } + + private void Message(System.String message) + { + infoStream.WriteLine("IFD [" + new DateTime().ToString() + "; " + ThreadClass.Current().Name + "]: " + message); + } + + /// Initialize the deleter: find all previous commits in + /// the Directory, incref the files they reference, call + /// the policy to let it delete commits. This will remove + /// any files not referenced by any of the commits. + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, System.IO.StreamWriter infoStream, DocumentsWriter docWriter, HashSet synced) + { + + this.docWriter = docWriter; + this.infoStream = infoStream; + this.synced = synced; + + if (infoStream != null) + { + Message("init: current segments file is \"" + segmentInfos.GetCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); + } + + this.policy = policy; + this.directory = directory; + + // First pass: walk the files and initialize our ref + // counts: + long currentGen = segmentInfos.Generation; + IndexFileNameFilter filter = IndexFileNameFilter.Filter; + + System.String[] files = directory.ListAll(); + + CommitPoint currentCommitPoint = null; + + for (int i = 0; i < files.Length; i++) + { + + System.String fileName = files[i]; + + if (filter.Accept(null, fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) + { + + // Add this file to refCounts with initial count 0: + GetRefCount(fileName); + + if (fileName.StartsWith(IndexFileNames.SEGMENTS)) + { + + // This is a commit (segments or segments_N), and + // it's valid (<= the max gen). Load it, then + // incref all files it refers to: + if (infoStream != null) + { + Message("init: load commit \"" + fileName + "\""); + } + SegmentInfos sis = new SegmentInfos(); + try + { + sis.Read(directory, fileName); + } + catch (System.IO.FileNotFoundException) + { + // LUCENE-948: on NFS (and maybe others), if + // you have writers switching back and forth + // between machines, it's very likely that the + // dir listing will be stale and will claim a + // file segments_X exists when in fact it + // doesn't. So, we catch this and handle it + // as if the file does not exist + if (infoStream != null) + { + Message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point"); + } + sis = null; + } + catch (System.IO.IOException) + { + if (SegmentInfos.GenerationFromSegmentsFileName(fileName) <= currentGen) + { + throw; + } + else + { + // Most likely we are opening an index that + // has an aborted "future" commit, so suppress + // exc in this case + sis = null; + } + } + if (sis != null) + { + CommitPoint commitPoint = new CommitPoint(this, commitsToDelete, directory, sis); + if (sis.Generation == segmentInfos.Generation) + { + currentCommitPoint = commitPoint; + } + commits.Add(commitPoint); + IncRef(sis, true); + + if (lastSegmentInfos == null || sis.Generation > lastSegmentInfos.Generation) + { + lastSegmentInfos = sis; + } + } + } + } + } + + if (currentCommitPoint == null) + { + // We did not in fact see the segments_N file + // corresponding to the segmentInfos that was passed + // in. Yet, it must exist, because our caller holds + // the write lock. This can happen when the directory + // listing was stale (eg when index accessed via NFS + // client with stale directory listing cache). So we + // try now to explicitly open this commit point: + SegmentInfos sis = new SegmentInfos(); + try + { + sis.Read(directory, segmentInfos.GetCurrentSegmentFileName()); + } + catch (System.IO.IOException) + { + throw new CorruptIndexException("failed to locate current segments_N file"); + } + if (infoStream != null) + Message("forced open of current segments file " + segmentInfos.GetCurrentSegmentFileName()); + currentCommitPoint = new CommitPoint(this, commitsToDelete, directory, sis); + commits.Add(currentCommitPoint); + IncRef(sis, true); + } + + // We keep commits list in sorted order (oldest to newest): + commits.Sort(); + + // Now delete anything with ref count at 0. These are + // presumably abandoned files eg due to crash of + // IndexWriter. + foreach(KeyValuePair entry in refCounts) + { + string fileName = entry.Key; + RefCount rc = refCounts[fileName]; + if (0 == rc.count) + { + if (infoStream != null) + { + Message("init: removing unreferenced file \"" + fileName + "\""); + } + DeleteFile(fileName); + } + } + + // Finally, give policy a chance to remove things on + // startup: + policy.OnInit(commits); + + // Always protect the incoming segmentInfos since + // sometime it may not be the most recent commit + Checkpoint(segmentInfos, false); + + startingCommitDeleted = currentCommitPoint.IsDeleted; + + DeleteCommits(); + } + + public SegmentInfos LastSegmentInfos + { + get { return lastSegmentInfos; } + } + + /// Remove the CommitPoints in the commitsToDelete List by + /// DecRef'ing all files from each SegmentInfos. + /// + private void DeleteCommits() + { + + int size = commitsToDelete.Count; + + if (size > 0) + { + + // First decref all files that had been referred to by + // the now-deleted commits: + for (int i = 0; i < size; i++) + { + CommitPoint commit = commitsToDelete[i]; + if (infoStream != null) + { + Message("deleteCommits: now decRef commit \"" + commit.SegmentsFileName + "\""); + } + foreach(string file in commit.files) + { + DecRef(file); + } + } + commitsToDelete.Clear(); + + // Now compact commits to remove deleted ones (preserving the sort): + size = commits.Count; + int readFrom = 0; + int writeTo = 0; + while (readFrom < size) + { + CommitPoint commit = commits[readFrom]; + if (!commit.deleted) + { + if (writeTo != readFrom) + { + commits[writeTo] = commits[readFrom]; + } + writeTo++; + } + readFrom++; + } + + while (size > writeTo) + { + commits.RemoveAt(size - 1); + size--; + } + } + } + + /// Writer calls this when it has hit an error and had to + /// roll back, to tell us that there may now be + /// unreferenced files in the filesystem. So we re-list + /// the filesystem and delete such files. If segmentName + /// is non-null, we will only delete files corresponding to + /// that segment. + /// + public void Refresh(System.String segmentName) + { + System.String[] files = directory.ListAll(); + IndexFileNameFilter filter = IndexFileNameFilter.Filter; + System.String segmentPrefix1; + System.String segmentPrefix2; + if (segmentName != null) + { + segmentPrefix1 = segmentName + "."; + segmentPrefix2 = segmentName + "_"; + } + else + { + segmentPrefix1 = null; + segmentPrefix2 = null; + } + + for (int i = 0; i < files.Length; i++) + { + System.String fileName = files[i]; + if (filter.Accept(null, fileName) && (segmentName == null || fileName.StartsWith(segmentPrefix1) || fileName.StartsWith(segmentPrefix2)) && !refCounts.ContainsKey(fileName) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN)) + { + // Unreferenced file, so remove it + if (infoStream != null) + { + Message("refresh [prefix=" + segmentName + "]: removing newly created unreferenced file \"" + fileName + "\""); + } + DeleteFile(fileName); + } + } + } + + public void Refresh() + { + Refresh(null); + } + + public void Dispose() + { + // Move to protected method if class becomes unsealed + // DecRef old files from the last checkpoint, if any: + int size = lastFiles.Count; + if (size > 0) + { + for (int i = 0; i < size; i++) + DecRef(lastFiles[i]); + lastFiles.Clear(); + } + + DeletePendingFiles(); + } + + private void DeletePendingFiles() + { + if (deletable != null) + { + IList oldDeletable = deletable; + deletable = null; + int size = oldDeletable.Count; + for (int i = 0; i < size; i++) + { + if (infoStream != null) + { + Message("delete pending file " + oldDeletable[i]); + } + DeleteFile(oldDeletable[i]); + } + } + } + + /// For definition of "check point" see IndexWriter comments: + /// "Clarification: Check Points (and commits)". + /// + /// Writer calls this when it has made a "consistent + /// change" to the index, meaning new files are written to + /// the index and the in-memory SegmentInfos have been + /// modified to point to those files. + /// + /// This may or may not be a commit (segments_N may or may + /// not have been written). + /// + /// We simply incref the files referenced by the new + /// SegmentInfos and decref the files we had previously + /// seen (if any). + /// + /// If this is a commit, we also call the policy to give it + /// a chance to remove other commits. If any commits are + /// removed, we decref their files as well. + /// + public void Checkpoint(SegmentInfos segmentInfos, bool isCommit) + { + + if (infoStream != null) + { + Message("now checkpoint \"" + segmentInfos.GetCurrentSegmentFileName() + "\" [" + segmentInfos.Count + " segments " + "; isCommit = " + isCommit + "]"); + } + + // Try again now to delete any previously un-deletable + // files (because they were in use, on Windows): + DeletePendingFiles(); + + // Incref the files: + IncRef(segmentInfos, isCommit); + + if (isCommit) + { + // Append to our commits list: + commits.Add(new CommitPoint(this, commitsToDelete, directory, segmentInfos)); + + // Tell policy so it can remove commits: + policy.OnCommit(commits); + + // Decref files for commits that were deleted by the policy: + DeleteCommits(); + } + else + { + + IList docWriterFiles; + if (docWriter != null) + { + docWriterFiles = docWriter.OpenFiles(); + if (docWriterFiles != null) + // We must incRef these files before decRef'ing + // last files to make sure we don't accidentally + // delete them: + IncRef(docWriterFiles); + } + else + docWriterFiles = null; + + // DecRef old files from the last checkpoint, if any: + int size = lastFiles.Count; + if (size > 0) + { + for (int i = 0; i < size; i++) + DecRef(lastFiles[i]); + lastFiles.Clear(); + } + + // Save files so we can decr on next checkpoint/commit: + lastFiles.Add(segmentInfos.Files(directory, false)); + + if (docWriterFiles != null) + { + lastFiles.Add(docWriterFiles); + } + } + } + + internal void IncRef(SegmentInfos segmentInfos, bool isCommit) + { + // If this is a commit point, also incRef the + // segments_N file: + foreach(string fileName in segmentInfos.Files(directory, isCommit)) + { + IncRef(fileName); + } + } + + internal void IncRef(ICollection files) + { + foreach(string file in files) + { + IncRef(file); + } + } + + internal void IncRef(string fileName) + { + RefCount rc = GetRefCount(fileName); + if (infoStream != null && VERBOSE_REF_COUNTS) + { + Message(" IncRef \"" + fileName + "\": pre-incr count is " + rc.count); + } + rc.IncRef(); + } + + internal void DecRef(ICollection files) + { + foreach(string file in files) + { + DecRef(file); + } + } + + internal void DecRef(System.String fileName) + { + RefCount rc = GetRefCount(fileName); + if (infoStream != null && VERBOSE_REF_COUNTS) + { + Message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count); + } + if (0 == rc.DecRef()) + { + // This file is no longer referenced by any past + // commit points nor by the in-memory SegmentInfos: + DeleteFile(fileName); + refCounts.Remove(fileName); + + if (synced != null) { + lock(synced) + { + synced.Remove(fileName); + } + } + } + } + + internal void DecRef(SegmentInfos segmentInfos) + { + foreach(string file in segmentInfos.Files(directory, false)) + { + DecRef(file); + } + } + + public bool Exists(String fileName) + { + if (!refCounts.ContainsKey(fileName)) + { + return false; + } + else + { + return GetRefCount(fileName).count > 0; + } + } + + private RefCount GetRefCount(System.String fileName) + { + RefCount rc; + if (!refCounts.ContainsKey(fileName)) + { + rc = new RefCount(fileName); + refCounts[fileName] = rc; + } + else + { + rc = refCounts[fileName]; + } + return rc; + } + + internal void DeleteFiles(System.Collections.Generic.IList files) + { + foreach(string file in files) + DeleteFile(file); + } + + /// Deletes the specified files, but only if they are new + /// (have not yet been incref'd). + /// + internal void DeleteNewFiles(System.Collections.Generic.ICollection files) + { + foreach(string fileName in files) + { + if (!refCounts.ContainsKey(fileName)) + { + if (infoStream != null) + { + Message("delete new file \"" + fileName + "\""); + } + DeleteFile(fileName); + } + } + } + + internal void DeleteFile(System.String fileName) + { + try + { + if (infoStream != null) + { + Message("delete \"" + fileName + "\""); + } + directory.DeleteFile(fileName); + } + catch (System.IO.IOException e) + { + // if delete fails + if (directory.FileExists(fileName)) + { + + // Some operating systems (e.g. Windows) don't + // permit a file to be deleted while it is opened + // for read (e.g. by another process or thread). So + // we assume that when a delete fails it is because + // the file is open in another process, and queue + // the file for subsequent deletion. + + if (infoStream != null) + { + Message("IndexFileDeleter: unable to remove file \"" + fileName + "\": " + e.ToString() + "; Will re-try later."); + } + if (deletable == null) + { + deletable = new List(); + } + deletable.Add(fileName); // add to deletable + } + } + } + + /// Tracks the reference count for a single index file: + sealed private class RefCount + { + + // fileName used only for better assert error messages + internal System.String fileName; + internal bool initDone; + internal RefCount(System.String fileName) + { + this.fileName = fileName; + } + + internal int count; + + public int IncRef() + { + if (!initDone) + { + initDone = true; + } + else + { + System.Diagnostics.Debug.Assert(count > 0, "RefCount is 0 pre-increment for file " + fileName); + } + return ++count; + } + + public int DecRef() + { + System.Diagnostics.Debug.Assert(count > 0, "RefCount is 0 pre-decrement for file " + fileName); + return --count; + } + } + + /// Holds details for each commit point. This class is + /// also passed to the deletion policy. Note: this class + /// has a natural ordering that is inconsistent with + /// equals. + /// + + sealed private class CommitPoint:IndexCommit, System.IComparable + { + private void InitBlock(IndexFileDeleter enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private IndexFileDeleter enclosingInstance; + public IndexFileDeleter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + internal long gen; + internal ICollection files; + internal string segmentsFileName; + internal bool deleted; + internal Directory directory; + internal ICollection commitsToDelete; + internal long version; + internal long generation; + internal bool isOptimized; + internal IDictionary userData; + + public CommitPoint(IndexFileDeleter enclosingInstance, ICollection commitsToDelete, Directory directory, SegmentInfos segmentInfos) + { + InitBlock(enclosingInstance); + this.directory = directory; + this.commitsToDelete = commitsToDelete; + userData = segmentInfos.UserData; + segmentsFileName = segmentInfos.GetCurrentSegmentFileName(); + version = segmentInfos.Version; + generation = segmentInfos.Generation; + files = segmentInfos.Files(directory, true); + gen = segmentInfos.Generation; + isOptimized = segmentInfos.Count == 1 && !segmentInfos.Info(0).HasDeletions(); + + System.Diagnostics.Debug.Assert(!segmentInfos.HasExternalSegments(directory)); + } + + public override string ToString() + { + return "IndexFileDeleter.CommitPoint(" + segmentsFileName + ")"; + } + + public override bool IsOptimized + { + get { return isOptimized; } + } + + public override string SegmentsFileName + { + get { return segmentsFileName; } + } + + public override ICollection FileNames + { + get { return files; } + } + + public override Directory Directory + { + get { return directory; } + } + + public override long Version + { + get { return version; } + } + + public override long Generation + { + get { return generation; } + } + + public override IDictionary UserData + { + get { return userData; } + } + + /// Called only be the deletion policy, to remove this + /// commit point from the index. + /// + public override void Delete() + { + if (!deleted) + { + deleted = true; + Enclosing_Instance.commitsToDelete.Add(this); + } + } + + public override bool IsDeleted + { + get { return deleted; } + } + + public int CompareTo(CommitPoint commit) + { + if (gen < commit.gen) + { + return - 1; + } + else if (gen > commit.gen) + { + return 1; + } + else + { + return 0; + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/IndexFileNameFilter.cs b/external/Lucene.Net.Light/src/core/Index/IndexFileNameFilter.cs new file mode 100644 index 0000000000..474381f279 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/IndexFileNameFilter.cs @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + + /// Filename filter that accept filenames and extensions only created by Lucene. + public class IndexFileNameFilter + { + + private static IndexFileNameFilter singleton = new IndexFileNameFilter(); + private HashSet extensions; + private HashSet extensionsInCFS; + + // Prevent instantiation. + private IndexFileNameFilter() + { + extensions = new HashSet(); + for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++) + { + extensions.Add(IndexFileNames.INDEX_EXTENSIONS[i]); + } + extensionsInCFS = new HashSet(); + for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.Length; i++) + { + extensionsInCFS.Add(IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i]); + } + } + + /* (non-Javadoc) + * + */ + public virtual bool Accept(System.IO.FileInfo dir, System.String name) + { + int i = name.LastIndexOf((System.Char) '.'); + if (i != - 1) + { + System.String extension = name.Substring(1 + i); + if (extensions.Contains(extension)) + { + return true; + } + else if (extension.StartsWith("f") && (new System.Text.RegularExpressions.Regex("f\\d+")).Match(extension).Success) + { + return true; + } + else if (extension.StartsWith("s") && (new System.Text.RegularExpressions.Regex("s\\d+")).Match(extension).Success) + { + return true; + } + } + else + { + if (name.Equals(IndexFileNames.DELETABLE)) + return true; + else if (name.StartsWith(IndexFileNames.SEGMENTS)) + return true; + } + return false; + } + + /// Returns true if this is a file that would be contained + /// in a CFS file. This function should only be called on + /// files that pass the above "accept" (ie, are already + /// known to be a Lucene index file). + /// + public virtual bool IsCFSFile(System.String name) + { + int i = name.LastIndexOf((System.Char) '.'); + if (i != - 1) + { + System.String extension = name.Substring(1 + i); + if (extensionsInCFS.Contains(extension)) + { + return true; + } + if (extension.StartsWith("f") && (new System.Text.RegularExpressions.Regex("f\\d+")).Match(extension).Success) + { + return true; + } + } + return false; + } + + public static IndexFileNameFilter Filter + { + get { return singleton; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/IndexFileNames.cs b/external/Lucene.Net.Light/src/core/Index/IndexFileNames.cs new file mode 100644 index 0000000000..ef5011904d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/IndexFileNames.cs @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// Useful constants representing filenames and extensions used by lucene + public sealed class IndexFileNames + { + + /// Name of the index segment file + public /*internal*/ const System.String SEGMENTS = "segments"; + + /// Name of the generation reference file name + public /*internal*/ const System.String SEGMENTS_GEN = "segments.gen"; + + /// Name of the index deletable file (only used in + /// pre-lockless indices) + /// + public /*internal*/ const System.String DELETABLE = "deletable"; + + /// Extension of norms file + public /*internal*/ const System.String NORMS_EXTENSION = "nrm"; + + /// Extension of freq postings file + public /*internal*/ const System.String FREQ_EXTENSION = "frq"; + + /// Extension of prox postings file + public /*internal*/ const System.String PROX_EXTENSION = "prx"; + + /// Extension of terms file + public /*internal*/ const System.String TERMS_EXTENSION = "tis"; + + /// Extension of terms index file + public /*internal*/ const System.String TERMS_INDEX_EXTENSION = "tii"; + + /// Extension of stored fields index file + public /*internal*/ const System.String FIELDS_INDEX_EXTENSION = "fdx"; + + /// Extension of stored fields file + public /*internal*/ const System.String FIELDS_EXTENSION = "fdt"; + + /// Extension of vectors fields file + public /*internal*/ const System.String VECTORS_FIELDS_EXTENSION = "tvf"; + + /// Extension of vectors documents file + public /*internal*/ const System.String VECTORS_DOCUMENTS_EXTENSION = "tvd"; + + /// Extension of vectors index file + public /*internal*/ const System.String VECTORS_INDEX_EXTENSION = "tvx"; + + /// Extension of compound file + public /*internal*/ const System.String COMPOUND_FILE_EXTENSION = "cfs"; + + /// Extension of compound file for doc store files + public /*internal*/ const System.String COMPOUND_FILE_STORE_EXTENSION = "cfx"; + + /// Extension of deletes + internal const System.String DELETES_EXTENSION = "del"; + + /// Extension of field infos + public /*internal*/ const System.String FIELD_INFOS_EXTENSION = "fnm"; + + /// Extension of plain norms + public /*internal*/ const System.String PLAIN_NORMS_EXTENSION = "f"; + + /// Extension of separate norms + public /*internal*/ const System.String SEPARATE_NORMS_EXTENSION = "s"; + + /// Extension of gen file + public /*internal*/ const System.String GEN_EXTENSION = "gen"; + + /// This array contains all filename extensions used by + /// Lucene's index files, with two exceptions, namely the + /// extension made up from .f + a number and + /// from .s + a number. Also note that + /// Lucene's segments_N files do not have any + /// filename extension. + /// + public /*internal*/ static readonly System.String[] INDEX_EXTENSIONS = new System.String[]{COMPOUND_FILE_EXTENSION, FIELD_INFOS_EXTENSION, FIELDS_INDEX_EXTENSION, FIELDS_EXTENSION, TERMS_INDEX_EXTENSION, TERMS_EXTENSION, FREQ_EXTENSION, PROX_EXTENSION, DELETES_EXTENSION, VECTORS_INDEX_EXTENSION, VECTORS_DOCUMENTS_EXTENSION, VECTORS_FIELDS_EXTENSION, GEN_EXTENSION, NORMS_EXTENSION, COMPOUND_FILE_STORE_EXTENSION}; + + /// File extensions that are added to a compound file + /// (same as above, minus "del", "gen", "cfs"). + /// + public /*internal*/ static readonly System.String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new System.String[]{FIELD_INFOS_EXTENSION, FIELDS_INDEX_EXTENSION, FIELDS_EXTENSION, TERMS_INDEX_EXTENSION, TERMS_EXTENSION, FREQ_EXTENSION, PROX_EXTENSION, VECTORS_INDEX_EXTENSION, VECTORS_DOCUMENTS_EXTENSION, VECTORS_FIELDS_EXTENSION, NORMS_EXTENSION}; + + public /*internal*/ static readonly System.String[] STORE_INDEX_EXTENSIONS = new System.String[]{VECTORS_INDEX_EXTENSION, VECTORS_FIELDS_EXTENSION, VECTORS_DOCUMENTS_EXTENSION, FIELDS_INDEX_EXTENSION, FIELDS_EXTENSION}; + + public /*internal*/ static readonly System.String[] NON_STORE_INDEX_EXTENSIONS = new System.String[]{FIELD_INFOS_EXTENSION, FREQ_EXTENSION, PROX_EXTENSION, TERMS_EXTENSION, TERMS_INDEX_EXTENSION, NORMS_EXTENSION}; + + /// File extensions of old-style index files + public /*internal*/ static readonly System.String[] COMPOUND_EXTENSIONS = new System.String[]{FIELD_INFOS_EXTENSION, FREQ_EXTENSION, PROX_EXTENSION, FIELDS_INDEX_EXTENSION, FIELDS_EXTENSION, TERMS_INDEX_EXTENSION, TERMS_EXTENSION}; + + /// File extensions for term vector support + public /*internal*/ static readonly System.String[] VECTOR_EXTENSIONS = new System.String[]{VECTORS_INDEX_EXTENSION, VECTORS_DOCUMENTS_EXTENSION, VECTORS_FIELDS_EXTENSION}; + + /// Computes the full file name from base, extension and + /// generation. If the generation is -1, the file name is + /// null. If it's 0, the file name is + /// If it's > 0, the file name is + /// + /// + /// -- main part of the file name + /// + /// -- extension of the filename (including .) + /// + /// -- generation + /// + public /*internal*/ static System.String FileNameFromGeneration(System.String base_Renamed, System.String extension, long gen) + { + if (gen == SegmentInfo.NO) + { + return null; + } + else if (gen == SegmentInfo.WITHOUT_GEN) + { + return base_Renamed + extension; + } + else + { +#if !PRE_LUCENE_NET_2_0_0_COMPATIBLE + return base_Renamed + "_" + Number.ToString(gen) + extension; +#else + return base_Renamed + "_" + System.Convert.ToString(gen, 16) + extension; +#endif + } + } + + /// Returns true if the provided filename is one of the doc + /// store files (ends with an extension in + /// STORE_INDEX_EXTENSIONS). + /// + internal static bool IsDocStoreFile(System.String fileName) + { + if (fileName.EndsWith(COMPOUND_FILE_STORE_EXTENSION)) + return true; + for (int i = 0; i < STORE_INDEX_EXTENSIONS.Length; i++) + if (fileName.EndsWith(STORE_INDEX_EXTENSIONS[i])) + return true; + return false; + } + + internal static System.String SegmentFileName(System.String segmentName, System.String ext) + { + return segmentName + "." + ext; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/IndexReader.cs b/external/Lucene.Net.Light/src/core/Index/IndexReader.cs new file mode 100644 index 0000000000..5c3bd9b33c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/IndexReader.cs @@ -0,0 +1,1374 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Documents; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using Lucene.Net.Store; +using Similarity = Lucene.Net.Search.Similarity; + +namespace Lucene.Net.Index +{ + + /// IndexReader is an abstract class, providing an interface for accessing an + /// index. Search of an index is done entirely through this abstract interface, + /// so that any subclass which implements it is searchable. + ///

Concrete subclasses of IndexReader are usually constructed with a call to + /// one of the static open() methods, e.g. + ///. + ///

For efficiency, in this API documents are often referred to via + /// document numbers, non-negative integers which each name a unique + /// document in the index. These document numbers are ephemeral--they may change + /// as documents are added to and deleted from an index. Clients should thus not + /// rely on a given document having the same number between sessions. + ///

An IndexReader can be opened on a directory for which an IndexWriter is + /// opened already, but it cannot be used to delete documents from the index then. + ///

+ /// NOTE: for backwards API compatibility, several methods are not listed + /// as abstract, but have no useful implementations in this base class and + /// instead always throw UnsupportedOperationException. Subclasses are + /// strongly encouraged to override these methods, but in many cases may not + /// need to. + ///

+ ///

+ /// NOTE: as of 2.4, it's possible to open a read-only + /// IndexReader using the static open methods that accepts the + /// boolean readOnly parameter. Such a reader has better + /// better concurrency as it's not necessary to synchronize on the + /// isDeleted method. You must explicitly specify false + /// if you want to make changes with the resulting IndexReader. + ///

+ ///

NOTE: + /// instances are completely thread + /// safe, meaning multiple threads can call any of its methods, + /// concurrently. If your application requires external + /// synchronization, you should not synchronize on the + /// IndexReader instance; use your own + /// (non-Lucene) objects instead. + ///

+ public abstract class IndexReader : System.ICloneable, System.IDisposable + { + private class AnonymousClassFindSegmentsFile : SegmentInfos.FindSegmentsFile + { + private void InitBlock(Lucene.Net.Store.Directory directory2) + { + this.directory2 = directory2; + } + private Lucene.Net.Store.Directory directory2; + internal AnonymousClassFindSegmentsFile(Lucene.Net.Store.Directory directory2, Lucene.Net.Store.Directory Param1):base(Param1) + { + InitBlock(directory2); + } + public override System.Object DoBody(System.String segmentFileName) + { + return (long) directory2.FileModified(segmentFileName); + } + } + + /// Constants describing field properties, for example used for + /// . + /// + public sealed class FieldOption + { + private readonly System.String option; + internal FieldOption() + { + } + internal FieldOption(System.String option) + { + this.option = option; + } + public override System.String ToString() + { + return this.option; + } + /// All fields + public static readonly FieldOption ALL = new FieldOption("ALL"); + /// All indexed fields + public static readonly FieldOption INDEXED = new FieldOption("INDEXED"); + /// All fields that store payloads + public static readonly FieldOption STORES_PAYLOADS = new FieldOption("STORES_PAYLOADS"); + /// All fields that omit tf + public static readonly FieldOption OMIT_TERM_FREQ_AND_POSITIONS = new FieldOption("OMIT_TERM_FREQ_AND_POSITIONS"); + /// All fields which are not indexed + public static readonly FieldOption UNINDEXED = new FieldOption("UNINDEXED"); + /// All fields which are indexed with termvectors enabled + public static readonly FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption("INDEXED_WITH_TERMVECTOR"); + /// All fields which are indexed but don't have termvectors enabled + public static readonly FieldOption INDEXED_NO_TERMVECTOR = new FieldOption("INDEXED_NO_TERMVECTOR"); + /// All fields with termvectors enabled. Please note that only standard termvector fields are returned + public static readonly FieldOption TERMVECTOR = new FieldOption("TERMVECTOR"); + /// All fields with termvectors with position values enabled + public static readonly FieldOption TERMVECTOR_WITH_POSITION = new FieldOption("TERMVECTOR_WITH_POSITION"); + /// All fields with termvectors with offset values enabled + public static readonly FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption("TERMVECTOR_WITH_OFFSET"); + /// All fields with termvectors with offset values and position values enabled + public static readonly FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption("TERMVECTOR_WITH_POSITION_OFFSET"); + } + + private bool closed; + protected internal bool hasChanges; + + private int refCount; + + protected internal static int DEFAULT_TERMS_INDEX_DIVISOR = 1; + + /// Expert: returns the current refCount for this reader + public virtual int RefCount + { + get + { + lock (this) + { + return refCount; + } + } + } + + /// Expert: increments the refCount of this IndexReader + /// instance. RefCounts are used to determine when a + /// reader can be closed safely, i.e. as soon as there are + /// no more references. Be sure to always call a + /// corresponding , in a finally clause; + /// otherwise the reader may never be closed. Note that + /// simply calls decRef(), which means that + /// the IndexReader will not really be closed until + /// has been called for all outstanding + /// references. + /// + /// + /// + /// + public virtual void IncRef() + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0); + EnsureOpen(); + refCount++; + } + } + + /// Expert: decreases the refCount of this IndexReader + /// instance. If the refCount drops to 0, then pending + /// changes (if any) are committed to the index and this + /// reader is closed. + /// + /// + /// IOException in case an IOException occurs in commit() or doClose() + /// + /// + /// + /// + public virtual void DecRef() + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0); + EnsureOpen(); + if (refCount == 1) + { + Commit(); + DoClose(); + } + refCount--; + } + } + + protected internal IndexReader() + { + refCount = 1; + } + + /// AlreadyClosedException if this IndexReader is closed + protected internal void EnsureOpen() + { + if (refCount <= 0) + { + throw new AlreadyClosedException("this IndexReader is closed"); + } + } + + /// Returns an IndexReader reading the index in the given + /// Directory. You should pass readOnly=true, since it + /// gives much better concurrent performance, unless you + /// intend to do write operations (delete documents or + /// change norms) with the reader. + /// + /// the index directory + /// true if no changes (deletions, norms) will be made with this IndexReader + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public static IndexReader Open(Directory directory, bool readOnly) + { + return Open(directory, null, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); + } + + /// Expert: returns an IndexReader reading the index in the given + /// . You should pass readOnly=true, since it + /// gives much better concurrent performance, unless you + /// intend to do write operations (delete documents or + /// change norms) with the reader. + /// + /// the commit point to open + /// + /// true if no changes (deletions, norms) will be made with this IndexReader + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public static IndexReader Open(IndexCommit commit, bool readOnly) + { + return Open(commit.Directory, null, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); + } + + /// Expert: returns an IndexReader reading the index in + /// the given Directory, with a custom + ///. You should pass readOnly=true, + /// since it gives much better concurrent performance, + /// unless you intend to do write operations (delete + /// documents or change norms) with the reader. + /// + /// the index directory + /// + /// a custom deletion policy (only used + /// if you use this reader to perform deletes or to set + /// norms); see for details. + /// + /// true if no changes (deletions, norms) will be made with this IndexReader + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly) + { + return Open(directory, deletionPolicy, null, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); + } + + /// Expert: returns an IndexReader reading the index in + /// the given Directory, with a custom + ///. You should pass readOnly=true, + /// since it gives much better concurrent performance, + /// unless you intend to do write operations (delete + /// documents or change norms) with the reader. + /// + /// the index directory + /// + /// a custom deletion policy (only used + /// if you use this reader to perform deletes or to set + /// norms); see for details. + /// + /// true if no changes (deletions, norms) will be made with this IndexReader + /// + /// Subsamples which indexed + /// terms are loaded into RAM. This has the same effect as + /// IndexWriter.SetTermIndexInterval + /// except that setting + /// must be done at indexing time while this setting can be + /// set per reader. When set to N, then one in every + /// N*termIndexInterval terms in the index is loaded into + /// memory. By setting this to a value > 1 you can reduce + /// memory usage, at the expense of higher latency when + /// loading a TermInfo. The default value is 1. Set this + /// to -1 to skip loading the terms index entirely. + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) + { + return Open(directory, deletionPolicy, null, readOnly, termInfosIndexDivisor); + } + + /// Expert: returns an IndexReader reading the index in + /// the given Directory, using a specific commit and with + /// a custom . You should pass + /// readOnly=true, since it gives much better concurrent + /// performance, unless you intend to do write operations + /// (delete documents or change norms) with the reader. + /// + /// the specific to open; + /// see to list all commits + /// in a directory + /// + /// a custom deletion policy (only used + /// if you use this reader to perform deletes or to set + /// norms); see for details. + /// + /// true if no changes (deletions, norms) will be made with this IndexReader + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly) + { + return Open(commit.Directory, deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); + } + + /// Expert: returns an IndexReader reading the index in + /// the given Directory, using a specific commit and with + /// a custom . You should pass + /// readOnly=true, since it gives much better concurrent + /// performance, unless you intend to do write operations + /// (delete documents or change norms) with the reader. + /// + /// the specific to open; + /// see to list all commits + /// in a directory + /// + /// a custom deletion policy (only used + /// if you use this reader to perform deletes or to set + /// norms); see for details. + /// + /// true if no changes (deletions, norms) will be made with this IndexReader + /// + /// Subsambles which indexed + /// terms are loaded into RAM. This has the same effect as + /// IndexWriter.SetTermIndexInterval + /// except that setting + /// must be done at indexing time while this setting can be + /// set per reader. When set to N, then one in every + /// N*termIndexInterval terms in the index is loaded into + /// memory. By setting this to a value > 1 you can reduce + /// memory usage, at the expense of higher latency when + /// loading a TermInfo. The default value is 1. Set this + /// to -1 to skip loading the terms index entirely. + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public static IndexReader Open(IndexCommit commit, IndexDeletionPolicy deletionPolicy, bool readOnly, int termInfosIndexDivisor) + { + return Open(commit.Directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); + } + + private static IndexReader Open(Directory directory, IndexDeletionPolicy deletionPolicy, IndexCommit commit, bool readOnly, int termInfosIndexDivisor) + { + return DirectoryReader.Open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); + } + + /// Refreshes an IndexReader if the index has changed since this instance + /// was (re)opened. + ///

+ /// Opening an IndexReader is an expensive operation. This method can be used + /// to refresh an existing IndexReader to reduce these costs. This method + /// tries to only load segments that have changed or were created after the + /// IndexReader was (re)opened. + ///

+ /// If the index has not changed since this instance was (re)opened, then this + /// call is a NOOP and returns this instance. Otherwise, a new instance is + /// returned. The old instance is not closed and remains usable.
+ ///

+ /// If the reader is reopened, even though they share + /// resources internally, it's safe to make changes + /// (deletions, norms) with the new reader. All shared + /// mutable state obeys "copy on write" semantics to ensure + /// the changes are not seen by other readers. + ///

+ /// You can determine whether a reader was actually reopened by comparing the + /// old instance with the instance returned by this method: + /// + /// IndexReader reader = ... + /// ... + /// IndexReader newReader = r.reopen(); + /// if (newReader != reader) { + /// ... // reader was reopened + /// reader.close(); + /// } + /// reader = newReader; + /// ... + /// + /// + /// Be sure to synchronize that code so that other threads, + /// if present, can never use reader after it has been + /// closed and before it's switched to newReader. + /// + ///

NOTE: If this reader is a near real-time + /// reader (obtained from , + /// reopen() will simply call writer.getReader() again for + /// you, though this may change in the future. + /// + ///

+ /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public virtual IndexReader Reopen() + { + lock (this) + { + throw new NotSupportedException("This reader does not support reopen()."); + } + } + + + /// Just like , except you can change the + /// readOnly of the original reader. If the index is + /// unchanged but readOnly is different then a new reader + /// will be returned. + /// + public virtual IndexReader Reopen(bool openReadOnly) + { + lock (this) + { + throw new NotSupportedException("This reader does not support reopen()."); + } + } + + /// Expert: reopen this reader on a specific commit point. + /// This always returns a readOnly reader. If the + /// specified commit point matches what this reader is + /// already on, and this reader is already readOnly, then + /// this same instance is returned; if it is not already + /// readOnly, a readOnly clone is returned. + /// + public virtual IndexReader Reopen(IndexCommit commit) + { + lock (this) + { + throw new NotSupportedException("This reader does not support reopen(IndexCommit)."); + } + } + + /// Efficiently clones the IndexReader (sharing most + /// internal state). + ///

+ /// On cloning a reader with pending changes (deletions, + /// norms), the original reader transfers its write lock to + /// the cloned reader. This means only the cloned reader + /// may make further changes to the index, and commit the + /// changes to the index on close, but the old reader still + /// reflects all changes made up until it was cloned. + ///

+ /// Like , it's safe to make changes to + /// either the original or the cloned reader: all shared + /// mutable state obeys "copy on write" semantics to ensure + /// the changes are not seen by other readers. + ///

+ ///

+ /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public virtual System.Object Clone() + { + throw new System.NotSupportedException("This reader does not implement clone()"); + } + + /// Clones the IndexReader and optionally changes readOnly. A readOnly + /// reader cannot open a writeable reader. + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public virtual IndexReader Clone(bool openReadOnly) + { + lock (this) + { + throw new System.NotSupportedException("This reader does not implement clone()"); + } + } + + /// Returns the directory associated with this index. The Default + /// implementation returns the directory specified by subclasses when + /// delegating to the IndexReader(Directory) constructor, or throws an + /// UnsupportedOperationException if one was not specified. + /// + /// UnsupportedOperationException if no directory + public virtual Directory Directory() + { + EnsureOpen(); + throw new NotSupportedException("This reader does not support this method."); + } + + /// Returns the time the index in the named directory was last modified. + /// Do not use this to check whether the reader is still up-to-date, use + /// instead. + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public static long LastModified(Directory directory2) + { + return (long) ((System.Int64) new AnonymousClassFindSegmentsFile(directory2, directory2).Run()); + } + + /// Reads version number from segments files. The version number is + /// initialized with a timestamp and then increased by one for each change of + /// the index. + /// + /// + /// where the index resides. + /// + /// version number. + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public static long GetCurrentVersion(Directory directory) + { + return SegmentInfos.ReadCurrentVersion(directory); + } + + /// Reads commitUserData, previously passed to + /// , + /// from current index segments file. This will return null if + /// + /// has never been called for this index. + /// + /// where the index resides. + /// + /// commit userData. + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + /// + /// + /// + /// + public static System.Collections.Generic.IDictionary GetCommitUserData(Directory directory) + { + return SegmentInfos.ReadCurrentUserData(directory); + } + + /// Version number when this IndexReader was opened. Not implemented in the + /// IndexReader base class. + /// + ///

+ /// If this reader is based on a Directory (ie, was created by calling + /// , or + /// on a reader based on a Directory), then + /// this method returns the version recorded in the commit that the reader + /// opened. This version is advanced every time is + /// called. + ///

+ /// + ///

+ /// If instead this reader is a near real-time reader (ie, obtained by a call + /// to , or by calling on a near + /// real-time reader), then this method returns the version of the last + /// commit done by the writer. Note that even as further changes are made + /// with the writer, the version will not changed until a commit is + /// completed. Thus, you should not rely on this method to determine when a + /// near real-time reader should be opened. Use instead. + ///

+ /// + ///

+ /// UnsupportedOperationException + /// unless overridden in subclass + /// + public virtual long Version + { + get { throw new System.NotSupportedException("This reader does not support this method."); } + } + + /// Retrieve the String userData optionally passed to + /// . + /// This will return null if + /// + /// has never been called for this index. + /// + /// + /// + public virtual IDictionary CommitUserData + { + get { throw new System.NotSupportedException("This reader does not support this method."); } + } + + /// Check whether any new changes have occurred to the index since this + /// reader was opened. + /// + ///

+ /// If this reader is based on a Directory (ie, was created by calling + /// + /// Open(Store.Directory) + /// , or on a reader based on a Directory), then + /// this method checks if any further commits (see + /// have occurred in that directory). + ///

+ /// + ///

+ /// If instead this reader is a near real-time reader (ie, obtained by a call + /// to , or by calling on a near + /// real-time reader), then this method checks if either a new commmit has + /// occurred, or any new uncommitted changes have taken place via the writer. + /// Note that even if the writer has only performed merging, this method will + /// still return false. + ///

+ /// + ///

+ /// In any event, if this returns false, you should call to + /// get a new reader that sees the changes. + ///

+ /// + ///

+ /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + /// UnsupportedOperationException unless overridden in subclass + public virtual bool IsCurrent() + { + throw new NotSupportedException("This reader does not support this method."); + } + + /// Checks is the index is optimized (if it has a single segment and + /// no deletions). Not implemented in the IndexReader base class. + /// + /// &lt;c&gt;true&lt;/c&gt; if the index is optimized; &lt;c&gt;false&lt;/c&gt; otherwise + /// UnsupportedOperationException unless overridden in subclass + public virtual bool IsOptimized() + { + throw new NotSupportedException("This reader does not support this method."); + } + + /// Return an array of term frequency vectors for the specified document. + /// The array contains a vector for each vectorized field in the document. + /// Each vector contains terms and frequencies for all terms in a given vectorized field. + /// If no such fields existed, the method returns null. The term vectors that are + /// returned may either be of type + /// or of type if + /// positions or offsets have been stored. + /// + /// + /// document for which term frequency vectors are returned + /// + /// array of term frequency vectors. May be null if no term vectors have been + /// stored for the specified document. + /// + /// IOException if index cannot be accessed + /// + /// + abstract public ITermFreqVector[] GetTermFreqVectors(int docNumber); + + + /// Return a term frequency vector for the specified document and field. The + /// returned vector contains terms and frequencies for the terms in + /// the specified field of this document, if the field had the storeTermVector + /// flag set. If termvectors had been stored with positions or offsets, a + /// is returned. + /// + /// + /// document for which the term frequency vector is returned + /// + /// field for which the term frequency vector is returned. + /// + /// term frequency vector May be null if field does not exist in the specified + /// document or term vector was not stored. + /// + /// IOException if index cannot be accessed + /// + /// + abstract public ITermFreqVector GetTermFreqVector(int docNumber, String field); + + /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of + /// the . + /// + /// The number of the document to load the vector for + /// + /// The name of the field to load + /// + /// The to process the vector. Must not be null + /// + /// IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. + /// + /// + abstract public void GetTermFreqVector(int docNumber, String field, TermVectorMapper mapper); + + /// Map all the term vectors for all fields in a Document + /// The number of the document to load the vector for + /// + /// The to process the vector. Must not be null + /// + /// IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. + abstract public void GetTermFreqVector(int docNumber, TermVectorMapper mapper); + + /// Returns true if an index exists at the specified directory. + /// If the directory does not exist or if there is no index in it. + /// + /// the directory to check for an index + /// + /// true if an index exists; false otherwise + /// + /// IOException if there is a problem with accessing the index + public static bool IndexExists(Directory directory) + { + return SegmentInfos.GetCurrentSegmentGeneration(directory) != - 1; + } + + /// Returns the number of documents in this index. + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public abstract int NumDocs(); + + /// Returns one greater than the largest possible document number. + /// This may be used to, e.g., determine how big to allocate an array which + /// will have an element for every document number in an index. + /// + public abstract int MaxDoc { get; } + + /// Returns the number of deleted documents. + public virtual int NumDeletedDocs + { + get { return MaxDoc - NumDocs(); } + } + + /// Returns the stored fields of the nth + /// Document in this index. + ///

+ /// NOTE: for performance reasons, this method does not check if the + /// requested document is deleted, and therefore asking for a deleted document + /// may yield unspecified results. Usually this is not required, however you + /// can call with the requested document ID to verify + /// the document is not deleted. + /// + ///

+ /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public virtual Document Document(int n) + { + EnsureOpen(); + return Document(n, null); + } + + /// Returns the stored fields of the nth + /// Document in this index. + ///

+ /// NOTE: for performance reasons, this method does not check if the + /// requested document is deleted, and therefore asking for a deleted document + /// may yield unspecified results. Usually this is not required, however you + /// can call with the requested document ID to verify + /// the document is not deleted. + /// + ///

+ /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + public Document this[int doc] + { + get { return Document(doc); } + } + + /// Get the at the n + /// th position. The may be used to determine + /// what s to load and how they should + /// be loaded. NOTE: If this Reader (more specifically, the underlying + /// FieldsReader) is closed before the lazy + /// is loaded an exception may be + /// thrown. If you want the value of a lazy + /// to be available after closing you + /// must explicitly load it or fetch the Document again with a new loader. + ///

+ /// NOTE: for performance reasons, this method does not check if the + /// requested document is deleted, and therefore asking for a deleted document + /// may yield unspecified results. Usually this is not required, however you + /// can call with the requested document ID to verify + /// the document is not deleted. + /// + ///

+ /// Get the document at the nth position + /// + /// The to use to determine what + /// Fields should be loaded on the Document. May be null, in which case + /// all Fields will be loaded. + /// + /// The stored fields of the + /// at the nth position + /// + /// CorruptIndexException if the index is corrupt + /// If there is a low-level IO error + /// + /// + /// + /// + /// + /// + /// + /// + // TODO (1.5): When we convert to JDK 1.5 make this Set + public abstract Document Document(int n, FieldSelector fieldSelector); + + /// Returns true if document n has been deleted + public abstract bool IsDeleted(int n); + + /// Returns true if any documents have been deleted + public abstract bool HasDeletions { get; } + + /// Returns true if there are norms stored for this field. + public virtual bool HasNorms(System.String field) + { + // backward compatible implementation. + // SegmentReader has an efficient implementation. + EnsureOpen(); + return Norms(field) != null; + } + + /// + /// Returns the byte-encoded normalization factor for the named field of + /// every document. This is used by the search code to score documents. + /// + /// + public abstract byte[] Norms(System.String field); + + /// + /// Reads the byte-encoded normalization factor for the named field of every + /// document. This is used by the search code to score documents. + /// + /// + public abstract void Norms(System.String field, byte[] bytes, int offset); + + /// Expert: Resets the normalization factor for the named field of the named + /// document. The norm represents the product of the field's boost + /// and its length normalization. Thus, to preserve the length normalization + /// values when resetting this, one should base the new value upon the old. + /// + /// NOTE: If this field does not store norms, then + /// this method call will silently do nothing. + /// + /// + /// + /// + /// If the index has changed since this reader was opened + /// + /// + /// If the index is corrupt + /// + /// + /// If another writer has this index open (write.lock could not be obtained) + /// + /// + /// If there is a low-level IO error + /// + public virtual void SetNorm(int doc, String field, byte value) + { + lock (this) + { + EnsureOpen(); + AcquireWriteLock(); + hasChanges = true; + DoSetNorm(doc, field, value); + } + } + + /// Implements setNorm in subclass. + protected internal abstract void DoSetNorm(int doc, System.String field, byte value_Renamed); + + /// + /// Expert: Resets the normalization factor for the named field of the named document. + /// + /// + /// + /// + /// If the index has changed since this reader was opened + /// + /// + /// If the index is corrupt + /// + /// + /// If another writer has this index open (write.lock could not be obtained) + /// + /// + /// If there is a low-level IO error + /// + public virtual void SetNorm(int doc, System.String field, float value) + { + EnsureOpen(); + SetNorm(doc, field, Similarity.EncodeNorm(value)); + } + + /// Returns an enumeration of all the terms in the index. The + /// enumeration is ordered by Term.compareTo(). Each term is greater + /// than all that precede it in the enumeration. Note that after + /// calling terms(), must be called + /// on the resulting enumeration before calling other methods such as + /// . + /// + /// + /// If there is a low-level IO error + /// + public abstract TermEnum Terms(); + + /// Returns an enumeration of all terms starting at a given term. If + /// the given term does not exist, the enumeration is positioned at the + /// first term greater than the supplied term. The enumeration is + /// ordered by Term.compareTo(). Each term is greater than all that + /// precede it in the enumeration. + /// + /// + /// If there is a low-level IO error + /// + public abstract TermEnum Terms(Term t); + + /// Returns the number of documents containing the term t. + /// If there is a low-level IO error + public abstract int DocFreq(Term t); + + /// Returns an enumeration of all the documents which contain + /// term. For each document, the document number, the frequency of + /// the term in that document is also provided, for use in + /// search scoring. If term is null, then all non-deleted + /// docs are returned with freq=1. + /// Thus, this method implements the mapping: + ///

+ /// Term    =>    <docNum, freq>* + /// + ///

The enumeration is ordered by document number. Each document number + /// is greater than all that precede it in the enumeration. + ///

+ /// If there is a low-level IO error + public virtual TermDocs TermDocs(Term term) + { + EnsureOpen(); + TermDocs termDocs = TermDocs(); + termDocs.Seek(term); + return termDocs; + } + + /// Returns an unpositioned enumerator. + /// If there is a low-level IO error + public abstract TermDocs TermDocs(); + + /// Returns an enumeration of all the documents which contain + /// term. For each document, in addition to the document number + /// and frequency of the term in that document, a list of all of the ordinal + /// positions of the term in the document is available. Thus, this method + /// implements the mapping: + /// + ///

+ /// Term    =>    <docNum, freq, + /// <pos1, pos2, ... + /// posfreq-1> + /// >* + /// + ///

This positional information facilitates phrase and proximity searching. + ///

The enumeration is ordered by document number. Each document number is + /// greater than all that precede it in the enumeration. + ///

+ /// If there is a low-level IO error + public virtual TermPositions TermPositions(Term term) + { + EnsureOpen(); + TermPositions termPositions = TermPositions(); + termPositions.Seek(term); + return termPositions; + } + + /// Returns an unpositioned enumerator. + /// If there is a low-level IO error + public abstract TermPositions TermPositions(); + + + + /// + /// Deletes the document numbered docNum. Once a document is + /// deleted it will not appear in TermDocs or TermPostitions enumerations. + /// Attempts to read its field with the + /// method will result in an error. The presence of this document may still be + /// reflected in the statistic, though + /// this will be corrected eventually as the index is further modified. + /// + /// + /// If the index has changed since this reader was opened + /// + /// If the index is corrupt + /// + /// If another writer has this index open (write.lock could not be obtained) + /// + /// If there is a low-level IO error + public virtual void DeleteDocument(int docNum) + { + lock (this) + { + EnsureOpen(); + AcquireWriteLock(); + hasChanges = true; + DoDelete(docNum); + } + } + + + /// Implements deletion of the document numbered docNum. + /// Applications should call or . + /// + protected internal abstract void DoDelete(int docNum); + + + /// + /// Deletes all documents that have a given term indexed. + /// This is useful if one uses a document field to hold a unique ID string for + /// the document. Then to delete such a document, one merely constructs a + /// term with the appropriate field and the unique ID string as its text and + /// passes it to this method. + /// See for information about when this deletion will + /// become effective. + /// + /// The number of documents deleted + /// + /// If the index has changed since this reader was opened + /// + /// If the index is corrupt + /// + /// If another writer has this index open (write.lock could not be obtained) + /// + /// If there is a low-level IO error + public virtual int DeleteDocuments(Term term) + { + EnsureOpen(); + TermDocs docs = TermDocs(term); + if (docs == null) + return 0; + int n = 0; + try + { + while (docs.Next()) + { + DeleteDocument(docs.Doc); + n++; + } + } + finally + { + docs.Close(); + } + return n; + } + + /// Undeletes all documents currently marked as deleted in this index. + /// + /// + /// + /// If the index has changed since this reader was opened + /// + /// If the index is corrupt + /// + /// If another writer has this index open (write.lock could not be obtained) + /// + /// If there is a low-level IO error + public virtual void UndeleteAll() + { + lock (this) + { + EnsureOpen(); + AcquireWriteLock(); + hasChanges = true; + DoUndeleteAll(); + } + } + + /// Implements actual undeleteAll() in subclass. + protected internal abstract void DoUndeleteAll(); + + /// + /// Does nothing by default. Subclasses that require a write lock for + /// index modifications must implement this method. + /// + protected internal virtual void AcquireWriteLock() + { + lock (this) + { + /* NOOP */ + } + } + + /// + /// + public void Flush() + { + lock (this) + { + EnsureOpen(); + Commit(); + } + } + + /// Opaque Map (String -> String) + /// that's recorded into the segments file in the index, + /// and retrievable by + /// + /// + public void Flush(IDictionary commitUserData) + { + lock (this) + { + EnsureOpen(); + Commit(commitUserData); + } + } + + /// Commit changes resulting from delete, undeleteAll, or + /// setNorm operations + /// + /// If an exception is hit, then either no changes or all + /// changes will have been committed to the index + /// (transactional semantics). + /// + /// If there is a low-level IO error + public /*protected internal*/ void Commit() + { + lock (this) + { + Commit(null); + } + } + + /// Commit changes resulting from delete, undeleteAll, or + /// setNorm operations + /// + /// If an exception is hit, then either no changes or all + /// changes will have been committed to the index + /// (transactional semantics). + /// + /// If there is a low-level IO error + public void Commit(IDictionary commitUserData) + { + lock (this) + { + if (hasChanges) + { + DoCommit(commitUserData); + } + hasChanges = false; + } + } + + /// Implements commit. + protected internal abstract void DoCommit(IDictionary commitUserData); + + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + /// Closes files associated with this index. + /// Also saves any new deletions to disk. + /// No other methods should be called after this has been called. + /// + /// If there is a low-level IO error + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + lock (this) + { + if (!closed) + { + DecRef(); + closed = true; + } + } + } + } + + /// Implements close. + protected internal abstract void DoClose(); + + + /// Get a list of unique field names that exist in this index and have the specified + /// field option information. + /// + /// specifies which field option should be available for the returned fields + /// + /// Collection of Strings indicating the names of the fields. + /// + /// + /// + public abstract ICollection GetFieldNames(FieldOption fldOption); + + /// Expert: return the IndexCommit that this reader has + /// opened. This method is only implemented by those + /// readers that correspond to a Directory with its own + /// segments_N file. + /// + ///

WARNING: this API is new and experimental and + /// may suddenly change.

+ ///

+ public virtual IndexCommit IndexCommit + { + get { throw new NotSupportedException("This reader does not support this method."); } + } + + /// Prints the filename and size of each file within a given compound file. + /// Add the -extract flag to extract files to the current working directory. + /// In order to make the extracted version of the index work, you have to copy + /// the segments file from the compound index into the directory where the extracted files are stored. + /// + /// Usage: Lucene.Net.Index.IndexReader [-extract] <cfsfile> + /// + [STAThread] + public static void Main(String[] args) + { + System.String filename = null; + bool extract = false; + + foreach (string t in args) + { + if (t.Equals("-extract")) + { + extract = true; + } + else if (filename == null) + { + filename = t; + } + } + + if (filename == null) + { + System.Console.Out.WriteLine("Usage: Lucene.Net.Index.IndexReader [-extract] "); + return ; + } + + Directory dir = null; + CompoundFileReader cfr = null; + + try + { + var file = new System.IO.FileInfo(filename); + System.String dirname = new System.IO.FileInfo(file.FullName).DirectoryName; + filename = file.Name; + dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirname)); + cfr = new CompoundFileReader(dir, filename); + + System.String[] files = cfr.ListAll(); + System.Array.Sort(files); // sort the array of filename so that the output is more readable + + foreach (string t in files) + { + long len = cfr.FileLength(t); + + if (extract) + { + System.Console.Out.WriteLine("extract " + t + " with " + len + " bytes to local directory..."); + IndexInput ii = cfr.OpenInput(t); + + var f = new System.IO.FileStream(t, System.IO.FileMode.Create); + + // read and write with a small buffer, which is more effectiv than reading byte by byte + var buffer = new byte[1024]; + int chunk = buffer.Length; + while (len > 0) + { + var bufLen = (int) System.Math.Min(chunk, len); + ii.ReadBytes(buffer, 0, bufLen); + f.Write(buffer, 0, bufLen); + len -= bufLen; + } + + f.Close(); + ii.Close(); + } + else + System.Console.Out.WriteLine(t + ": " + len + " bytes"); + } + } + catch (System.IO.IOException ioe) + { + System.Console.Error.WriteLine(ioe.StackTrace); + } + finally + { + try + { + if (dir != null) + dir.Close(); + if (cfr != null) + cfr.Close(); + } + catch (System.IO.IOException ioe) + { + System.Console.Error.WriteLine(ioe.StackTrace); + } + } + } + + /// Returns all commit points that exist in the Directory. + /// Normally, because the default is + ///, there would be only + /// one commit point. But if you're using a custom + /// then there could be many commits. + /// Once you have a given commit, you can open a reader on + /// it by calling + /// There must be at least one commit in + /// the Directory, else this method throws . + /// Note that if a commit is in + /// progress while this method is running, that commit + /// may or may not be returned array. + /// + public static System.Collections.Generic.ICollection ListCommits(Directory dir) + { + return DirectoryReader.ListCommits(dir); + } + + /// Expert: returns the sequential sub readers that this + /// reader is logically composed of. For example, + /// IndexSearcher uses this API to drive searching by one + /// sub reader at a time. If this reader is not composed + /// of sequential child readers, it should return null. + /// If this method returns an empty array, that means this + /// reader is a null reader (for example a MultiReader + /// that has no sub readers). + ///

+ /// NOTE: You should not try using sub-readers returned by + /// this method to make any changes (setNorm, deleteDocument, + /// etc.). While this might succeed for one composite reader + /// (like MultiReader), it will most likely lead to index + /// corruption for other readers (like DirectoryReader obtained + /// through . Use the parent reader directly. + ///

+ public virtual IndexReader[] GetSequentialSubReaders() + { + return null; + } + + /// Expert + public virtual object FieldCacheKey + { + get { return this; } + } + + /* Expert. Warning: this returns null if the reader has + * no deletions + */ + + public virtual object DeletesCacheKey + { + get { return this; } + } + + /// Returns the number of unique terms (across all fields) + /// in this reader. + /// + /// This method returns long, even though internally + /// Lucene cannot handle more than 2^31 unique terms, for + /// a possible future when this limitation is removed. + /// + /// + /// UnsupportedOperationException if this count + /// cannot be easily determined (eg Multi*Readers). + /// Instead, you should call + /// and ask each sub reader for + /// its unique term count. + /// + public virtual long UniqueTermCount + { + get { throw new System.NotSupportedException("this reader does not implement getUniqueTermCount()"); } + } + + /// + /// For IndexReader implementations that use + /// TermInfosReader to read terms, this returns the + /// current indexDivisor as specified when the reader was + /// opened. + /// + public virtual int TermInfosIndexDivisor + { + get { throw new NotSupportedException("This reader does not support this method."); } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/IndexWriter.cs.REMOVED.git-id b/external/Lucene.Net.Light/src/core/Index/IndexWriter.cs.REMOVED.git-id new file mode 100644 index 0000000000..c5c9c1dc3b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/IndexWriter.cs.REMOVED.git-id @@ -0,0 +1 @@ +dda1738b5b62998937804cc0b4a5ddfd58eb2e2a \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/IntBlockPool.cs b/external/Lucene.Net.Light/src/core/Index/IntBlockPool.cs new file mode 100644 index 0000000000..5fbee30043 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/IntBlockPool.cs @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + sealed class IntBlockPool + { + private void InitBlock() + { + intUpto = DocumentsWriter.INT_BLOCK_SIZE; + } + + public int[][] buffers = new int[10][]; + + internal int bufferUpto = - 1; // Which buffer we are upto + public int intUpto; // Where we are in head buffer + + public int[] buffer; // Current head buffer + public int intOffset = - DocumentsWriter.INT_BLOCK_SIZE; // Current head offset + + private DocumentsWriter docWriter; + internal bool trackAllocations; + + public IntBlockPool(DocumentsWriter docWriter, bool trackAllocations) + { + InitBlock(); + this.docWriter = docWriter; + this.trackAllocations = trackAllocations; + } + + public void Reset() + { + if (bufferUpto != - 1) + { + if (bufferUpto > 0) + // Recycle all but the first buffer + docWriter.RecycleIntBlocks(buffers, 1, 1 + bufferUpto); + + // Reuse first buffer + bufferUpto = 0; + intUpto = 0; + intOffset = 0; + buffer = buffers[0]; + } + } + + public void NextBuffer() + { + if (1 + bufferUpto == buffers.Length) + { + int[][] newBuffers = new int[(int) (buffers.Length * 1.5)][]; + Array.Copy(buffers, 0, newBuffers, 0, buffers.Length); + buffers = newBuffers; + } + buffer = buffers[1 + bufferUpto] = docWriter.GetIntBlock(trackAllocations); + bufferUpto++; + + intUpto = 0; + intOffset += DocumentsWriter.INT_BLOCK_SIZE; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumer.cs b/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumer.cs new file mode 100644 index 0000000000..bb9b2f8919 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumer.cs @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + + abstract class InvertedDocConsumer + { + + /// Add a new thread + internal abstract InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread); + + /// Abort (called after hitting AbortException) + public abstract void Abort(); + + /// Flush a new segment + internal abstract void Flush( + IDictionary> threadsAndFields, + SegmentWriteState state); + + /// Close doc stores + internal abstract void CloseDocStore(SegmentWriteState state); + + /// Attempt to free RAM, returning true if any RAM was + /// freed + /// + public abstract bool FreeRAM(); + + internal FieldInfos fieldInfos; + + internal virtual void SetFieldInfos(FieldInfos fieldInfos) + { + this.fieldInfos = fieldInfos; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumerPerField.cs b/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumerPerField.cs new file mode 100644 index 0000000000..471d9b70e9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumerPerField.cs @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + + abstract class InvertedDocConsumerPerField + { + + // Called once per field, and is given all Fieldable + // occurrences for this field in the document. Return + // true if you wish to see inverted tokens for these + // fields: + internal abstract bool Start(IFieldable[] fields, int count); + + // Called before a field instance is being processed + internal abstract void Start(IFieldable field); + + // Called once per inverted token + internal abstract void Add(); + + // Called once per field per document, after all Fieldable + // occurrences are inverted + internal abstract void Finish(); + + // Called on hitting an aborting exception + public abstract void Abort(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumerPerThread.cs b/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumerPerThread.cs new file mode 100644 index 0000000000..49ed8dfd9d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/InvertedDocConsumerPerThread.cs @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + abstract class InvertedDocConsumerPerThread + { + public abstract void StartDocument(); + internal abstract InvertedDocConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + public abstract DocumentsWriter.DocWriter FinishDocument(); + public abstract void Abort(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumer.cs b/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumer.cs new file mode 100644 index 0000000000..fb0a69e513 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumer.cs @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + + abstract class InvertedDocEndConsumer + { + public abstract InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread); + public abstract void Flush(IDictionary> threadsAndFields, SegmentWriteState state); + internal abstract void CloseDocStore(SegmentWriteState state); + public abstract void Abort(); + internal abstract void SetFieldInfos(FieldInfos fieldInfos); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumerPerField.cs b/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumerPerField.cs new file mode 100644 index 0000000000..dfad1c9f49 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumerPerField.cs @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + abstract class InvertedDocEndConsumerPerField + { + internal abstract void Finish(); + internal abstract void Abort(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumerPerThread.cs b/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumerPerThread.cs new file mode 100644 index 0000000000..2f4fb5ce4c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/InvertedDocEndConsumerPerThread.cs @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + abstract class InvertedDocEndConsumerPerThread + { + internal abstract void StartDocument(); + internal abstract InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo); + internal abstract void FinishDocument(); + internal abstract void Abort(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs b/external/Lucene.Net.Light/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs new file mode 100644 index 0000000000..3775de152c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/KeepOnlyLastCommitDeletionPolicy.cs @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + + /// This implementation that + /// keeps only the most recent commit and immediately removes + /// all prior commits after a new commit is done. This is + /// the default deletion policy. + /// + + public sealed class KeepOnlyLastCommitDeletionPolicy : IndexDeletionPolicy + { + + /// Deletes all commits except the most recent one. + public void OnInit(IList commits) where T : IndexCommit + { + // Note that commits.size() should normally be 1: + OnCommit(commits); + } + + /// Deletes all commits except the most recent one. + public void OnCommit(IList commits) where T : IndexCommit + { + // Note that commits.size() should normally be 2 (if not + // called by onInit above): + int size = commits.Count; + for (int i = 0; i < size - 1; i++) + { + commits[i].Delete(); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/LogByteSizeMergePolicy.cs b/external/Lucene.Net.Light/src/core/Index/LogByteSizeMergePolicy.cs new file mode 100644 index 0000000000..5d5c9522e8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/LogByteSizeMergePolicy.cs @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// This is a that measures size of a + /// segment as the total byte size of the segment's files. + /// + public class LogByteSizeMergePolicy : LogMergePolicy + { + + /// + /// + public const double DEFAULT_MIN_MERGE_MB = 1.6; + + /// Default maximum segment size. A segment of this size + /// + /// + public static readonly long DEFAULT_MAX_MERGE_MB = long.MaxValue; + + public LogByteSizeMergePolicy(IndexWriter writer) + : base(writer) + { + minMergeSize = (long) (DEFAULT_MIN_MERGE_MB * 1024 * 1024); + //mgarski - the line below causes an overflow in .NET, resulting in a negative number... + //maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB * 1024 * 1024); + maxMergeSize = DEFAULT_MAX_MERGE_MB; + } + protected internal override long Size(SegmentInfo info) + { + return SizeBytes(info); + } + + protected override void Dispose(bool disposing) + { + // Do nothing. + } + + + ///

Gets or sets the largest segment (measured by total + /// byte size of the segment's files, in MB) that may be + /// merged with other segments. Small values (e.g., less + /// than 50 MB) are best for interactive indexing, as this + /// limits the length of pauses while indexing to a few + /// seconds. Larger values are best for batched indexing + /// and speedier searches.

+ /// + ///

Note that is also + /// used to check whether a segment is too large for + /// merging (it's either or).

+ ///

+ public virtual double MaxMergeMB + { + get { return maxMergeSize/1024d/1024d; } + set + { + //mgarski: java gracefully overflows to Int64.MaxValue, .NET to MinValue... + maxMergeSize = (long) (value*1024*1024); + if (maxMergeSize < 0) + { + maxMergeSize = DEFAULT_MAX_MERGE_MB; + } + } + } + + /// Gets or sets the minimum size for the lowest level segments. + /// Any segments below this size are considered to be on + /// the same level (even if they vary drastically in size) + /// and will be merged whenever there are mergeFactor of + /// them. This effectively truncates the "long tail" of + /// small segments that would otherwise be created into a + /// single level. If you set this too large, it could + /// greatly increase the merging cost during indexing (if + /// you flush many small segments). + /// + public virtual double MinMergeMB + { + get { return ((double) minMergeSize)/1024/1024; } + set { minMergeSize = (long) (value*1024*1024); } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/LogDocMergePolicy.cs b/external/Lucene.Net.Light/src/core/Index/LogDocMergePolicy.cs new file mode 100644 index 0000000000..55ee407ddf --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/LogDocMergePolicy.cs @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// This is a that measures size of a + /// segment as the number of documents (not taking deletions + /// into account). + /// + + public class LogDocMergePolicy : LogMergePolicy + { + + /// + /// + public const int DEFAULT_MIN_MERGE_DOCS = 1000; + + public LogDocMergePolicy(IndexWriter writer):base(writer) + { + minMergeSize = DEFAULT_MIN_MERGE_DOCS; + + // maxMergeSize is never used by LogDocMergePolicy; set + // it to Long.MAX_VALUE to disable it + maxMergeSize = System.Int64.MaxValue; + } + protected internal override long Size(SegmentInfo info) + { + return SizeDocs(info); + } + + protected override void Dispose(bool disposing) + { + // Do nothing. + } + + /// Gets or sets the minimum size for the lowest level segments. + /// Any segments below this size are considered to be on + /// the same level (even if they vary drastically in size) + /// and will be merged whenever there are mergeFactor of + /// them. This effectively truncates the "long tail" of + /// small segments that would otherwise be created into a + /// single level. If you set this too large, it could + /// greatly increase the merging cost during indexing (if + /// you flush many small segments). + /// + public virtual int MinMergeDocs + { + get { return (int) minMergeSize; } + set { minMergeSize = value; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/LogMergePolicy.cs b/external/Lucene.Net.Light/src/core/Index/LogMergePolicy.cs new file mode 100644 index 0000000000..c087835bf3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/LogMergePolicy.cs @@ -0,0 +1,580 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + + ///

This class implements a that tries + /// to merge segments into levels of exponentially + /// increasing size, where each level has fewer segments than + /// the value of the merge factor. Whenever extra segments + /// (beyond the merge factor upper bound) are encountered, + /// all segments within the level are merged. You can get or + /// set the merge factor using and + /// respectively.

+ /// + ///

This class is abstract and requires a subclass to + /// define the method which specifies how a + /// segment's size is determined. + /// is one subclass that measures size by document count in + /// the segment. is another + /// subclass that measures size as the total byte size of the + /// file(s) for the segment.

+ ///

+ + public abstract class LogMergePolicy : MergePolicy + { + + /// Defines the allowed range of log(size) for each + /// level. A level is computed by taking the max segment + /// log size, minus LEVEL_LOG_SPAN, and finding all + /// segments falling within that range. + /// + public const double LEVEL_LOG_SPAN = 0.75; + + /// Default merge factor, which is how many segments are + /// merged at a time + /// + public const int DEFAULT_MERGE_FACTOR = 10; + + /// Default maximum segment size. A segment of this size + /// + /// + public static readonly int DEFAULT_MAX_MERGE_DOCS = System.Int32.MaxValue; + + /// Default noCFSRatio. If a merge's size is >= 10% of + /// the index, then we disable compound file for it. + /// See + /// + public static double DEFAULT_NO_CFS_RATIO = 0.1; + + private int mergeFactor = DEFAULT_MERGE_FACTOR; + + internal long minMergeSize; + internal long maxMergeSize; + internal int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + + protected double internalNoCFSRatio = DEFAULT_NO_CFS_RATIO; + + /* TODO 3.0: change this default to true */ + protected internal bool internalCalibrateSizeByDeletes = true; + + private bool useCompoundFile = true; + private bool useCompoundDocStore = true; + + protected LogMergePolicy(IndexWriter writer):base(writer) + { + } + + protected internal virtual bool Verbose() + { + return writer != null && writer.Verbose; + } + + public double NoCFSRatio + { + get { return internalNoCFSRatio; } + set + { + if (value < 0.0 || value > 1.0) + { + throw new ArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + value); + } + this.internalNoCFSRatio = value; + } + } + + /* If a merged segment will be more than this percentage + * of the total size of the index, leave the segment as + * non-compound file even if compound file is enabled. + * Set to 1.0 to always use CFS regardless of merge + * size. */ + private void Message(System.String message) + { + if (Verbose()) + writer.Message("LMP: " + message); + } + + + /// Gets or sets how often segment indices are merged by + /// addDocument(). With smaller values, less RAM is used + /// while indexing, and searches on unoptimized indices are + /// faster, but indexing speed is slower. With larger + /// values, more RAM is used during indexing, and while + /// searches on unoptimized indices are slower, indexing is + /// faster. Thus larger values (> 10) are best for batch + /// index creation, and smaller values (< 10) for indices + /// that are interactively maintained. + /// + public virtual int MergeFactor + { + get { return mergeFactor; } + set + { + if (value < 2) + throw new System.ArgumentException("mergeFactor cannot be less than 2"); + this.mergeFactor = value; + } + } + + public override bool UseCompoundFile(SegmentInfos infos, SegmentInfo info) + { + return useCompoundFile; + } + + /// Gets or sets whether compound file format should be used for + /// newly flushed and newly merged segments. + /// + public virtual void SetUseCompoundFile(bool useCompoundFile) + { + this.useCompoundFile = useCompoundFile; + } + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual bool GetUseCompoundFile() + { + return useCompoundFile; + } + + // Javadoc inherited + public override bool UseCompoundDocStore(SegmentInfos infos) + { + return useCompoundDocStore; + } + + /// Sets whether compound file format should be used for + /// newly flushed and newly merged doc store + /// segment files (term vectors and stored fields). + /// + public virtual void SetUseCompoundDocStore(bool useCompoundDocStore) + { + this.useCompoundDocStore = useCompoundDocStore; + } + + /// Returns true if newly flushed and newly merge doc + /// store segment files (term vectors and stored fields) + /// + /// + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual bool GetUseCompoundDocStore() + { + return useCompoundDocStore; + } + + /// Gets or sets whether the segment size should be calibrated by + /// the number of deletes when choosing segments for merge. + /// + public virtual bool CalibrateSizeByDeletes + { + set { this.internalCalibrateSizeByDeletes = value; } + get { return internalCalibrateSizeByDeletes; } + } + + abstract protected internal long Size(SegmentInfo info); + + protected internal virtual long SizeDocs(SegmentInfo info) + { + if (internalCalibrateSizeByDeletes) + { + int delCount = writer.NumDeletedDocs(info); + return (info.docCount - (long) delCount); + } + else + { + return info.docCount; + } + } + + protected internal virtual long SizeBytes(SegmentInfo info) + { + long byteSize = info.SizeInBytes(); + if (internalCalibrateSizeByDeletes) + { + int delCount = writer.NumDeletedDocs(info); + float delRatio = (info.docCount <= 0?0.0f:((float) delCount / (float) info.docCount)); + return (info.docCount <= 0?byteSize:(long) (byteSize * (1.0f - delRatio))); + } + else + { + return byteSize; + } + } + + private bool IsOptimized(SegmentInfos infos, int maxNumSegments, ISet segmentsToOptimize) + { + int numSegments = infos.Count; + int numToOptimize = 0; + SegmentInfo optimizeInfo = null; + for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) + { + SegmentInfo info = infos.Info(i); + if (segmentsToOptimize.Contains(info)) + { + numToOptimize++; + optimizeInfo = info; + } + } + + return numToOptimize <= maxNumSegments && (numToOptimize != 1 || IsOptimized(optimizeInfo)); + } + + /// Returns true if this single info is optimized (has no + /// pending norms or deletes, is in the same dir as the + /// writer, and matches the current compound file setting + /// + private bool IsOptimized(SegmentInfo info) + { + bool hasDeletions = writer.NumDeletedDocs(info) > 0; + return !hasDeletions && !info.HasSeparateNorms() && info.dir == writer.Directory && + (info.GetUseCompoundFile() == useCompoundFile || internalNoCFSRatio < 1.0); + } + + /// Returns the merges necessary to optimize the index. + /// This merge policy defines "optimized" to mean only one + /// segment in the index, where that segment has no + /// deletions pending nor separate norms, and it is in + /// compound file format if the current useCompoundFile + /// setting is true. This method returns multiple merges + /// (mergeFactor at a time) so the + /// in use may make use of concurrency. + /// + public override MergeSpecification FindMergesForOptimize(SegmentInfos infos, int maxNumSegments, ISet segmentsToOptimize) + { + MergeSpecification spec; + + System.Diagnostics.Debug.Assert(maxNumSegments > 0); + + if (!IsOptimized(infos, maxNumSegments, segmentsToOptimize)) + { + + // Find the newest (rightmost) segment that needs to + // be optimized (other segments may have been flushed + // since optimize started): + int last = infos.Count; + while (last > 0) + { + SegmentInfo info = infos.Info(--last); + if (segmentsToOptimize.Contains(info)) + { + last++; + break; + } + } + + if (last > 0) + { + + spec = new MergeSpecification(); + + // First, enroll all "full" merges (size + // mergeFactor) to potentially be run concurrently: + while (last - maxNumSegments + 1 >= mergeFactor) + { + spec.Add(MakeOneMerge(infos, infos.Range(last - mergeFactor, last))); + last -= mergeFactor; + } + + // Only if there are no full merges pending do we + // add a final partial (< mergeFactor segments) merge: + if (0 == spec.merges.Count) + { + if (maxNumSegments == 1) + { + + // Since we must optimize down to 1 segment, the + // choice is simple: + if (last > 1 || !IsOptimized(infos.Info(0))) + spec.Add(MakeOneMerge(infos, infos.Range(0, last))); + } + else if (last > maxNumSegments) + { + + // Take care to pick a partial merge that is + // least cost, but does not make the index too + // lopsided. If we always just picked the + // partial tail then we could produce a highly + // lopsided index over time: + + // We must merge this many segments to leave + // maxNumSegments in the index (from when + // optimize was first kicked off): + int finalMergeSize = last - maxNumSegments + 1; + + // Consider all possible starting points: + long bestSize = 0; + int bestStart = 0; + + for (int i = 0; i < last - finalMergeSize + 1; i++) + { + long sumSize = 0; + for (int j = 0; j < finalMergeSize; j++) + sumSize += Size(infos.Info(j + i)); + if (i == 0 || (sumSize < 2 * Size(infos.Info(i - 1)) && sumSize < bestSize)) + { + bestStart = i; + bestSize = sumSize; + } + } + + spec.Add(MakeOneMerge(infos, infos.Range(bestStart, bestStart + finalMergeSize))); + } + } + } + else + spec = null; + } + else + spec = null; + + return spec; + } + + /// Finds merges necessary to expunge all deletes from the + /// index. We simply merge adjacent segments that have + /// deletes, up to mergeFactor at a time. + /// + public override MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos) + { + int numSegments = segmentInfos.Count; + + if (Verbose()) + Message("findMergesToExpungeDeletes: " + numSegments + " segments"); + + MergeSpecification spec = new MergeSpecification(); + int firstSegmentWithDeletions = - 1; + for (int i = 0; i < numSegments; i++) + { + SegmentInfo info = segmentInfos.Info(i); + int delCount = writer.NumDeletedDocs(info); + if (delCount > 0) + { + if (Verbose()) + Message(" segment " + info.name + " has deletions"); + if (firstSegmentWithDeletions == - 1) + firstSegmentWithDeletions = i; + else if (i - firstSegmentWithDeletions == mergeFactor) + { + // We've seen mergeFactor segments in a row with + // deletions, so force a merge now: + if (Verbose()) + Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); + spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); + firstSegmentWithDeletions = i; + } + } + else if (firstSegmentWithDeletions != - 1) + { + // End of a sequence of segments with deletions, so, + // merge those past segments even if it's fewer than + // mergeFactor segments + if (Verbose()) + Message(" add merge " + firstSegmentWithDeletions + " to " + (i - 1) + " inclusive"); + spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, i))); + firstSegmentWithDeletions = - 1; + } + } + + if (firstSegmentWithDeletions != - 1) + { + if (Verbose()) + Message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments - 1) + " inclusive"); + spec.Add(MakeOneMerge(segmentInfos, segmentInfos.Range(firstSegmentWithDeletions, numSegments))); + } + + return spec; + } + + /// Checks if any merges are now necessary and returns a + /// if so. A merge + /// is necessary when there are more than + /// segments at a given level. When + /// multiple levels have too many segments, this method + /// will return multiple merges, allowing the + /// to use concurrency. + /// + public override MergeSpecification FindMerges(SegmentInfos infos) + { + + int numSegments = infos.Count; + if (Verbose()) + Message("findMerges: " + numSegments + " segments"); + + // Compute levels, which is just log (base mergeFactor) + // of the size of each segment + float[] levels = new float[numSegments]; + float norm = (float) System.Math.Log(mergeFactor); + + for (int i = 0; i < numSegments; i++) + { + SegmentInfo info = infos.Info(i); + long size = Size(info); + + // Floor tiny segments + if (size < 1) + size = 1; + levels[i] = (float) System.Math.Log(size) / norm; + } + + float levelFloor; + if (minMergeSize <= 0) + levelFloor = (float) 0.0; + else + { + levelFloor = (float) (System.Math.Log(minMergeSize) / norm); + } + + // Now, we quantize the log values into levels. The + // first level is any segment whose log size is within + // LEVEL_LOG_SPAN of the max size, or, who has such as + // segment "to the right". Then, we find the max of all + // other segments and use that to define the next level + // segment, etc. + + MergeSpecification spec = null; + + int start = 0; + while (start < numSegments) + { + + // Find max level of all segments not already + // quantized. + float maxLevel = levels[start]; + for (int i = 1 + start; i < numSegments; i++) + { + float level = levels[i]; + if (level > maxLevel) + maxLevel = level; + } + + // Now search backwards for the rightmost segment that + // falls into this level: + float levelBottom; + if (maxLevel < levelFloor) + // All remaining segments fall into the min level + levelBottom = - 1.0F; + else + { + levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN); + + // Force a boundary at the level floor + if (levelBottom < levelFloor && maxLevel >= levelFloor) + levelBottom = levelFloor; + } + + int upto = numSegments - 1; + while (upto >= start) + { + if (levels[upto] >= levelBottom) + { + break; + } + upto--; + } + if (Verbose()) + Message(" level " + levelBottom + " to " + maxLevel + ": " + (1 + upto - start) + " segments"); + + // Finally, record all merges that are viable at this level: + int end = start + mergeFactor; + while (end <= 1 + upto) + { + bool anyTooLarge = false; + for (int i = start; i < end; i++) + { + SegmentInfo info = infos.Info(i); + anyTooLarge |= (Size(info) >= maxMergeSize || SizeDocs(info) >= maxMergeDocs); + } + + if (!anyTooLarge) + { + if (spec == null) + spec = new MergeSpecification(); + if (Verbose()) + Message(" " + start + " to " + end + ": add this merge"); + spec.Add(MakeOneMerge(infos, infos.Range(start, end))); + } + else if (Verbose()) + Message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); + + start = end; + end = start + mergeFactor; + } + + start = 1 + upto; + } + + return spec; + } + + protected OneMerge MakeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) + { + bool doCFS; + if (!useCompoundFile) + { + doCFS = false; + } + else if (internalNoCFSRatio == 1.0) + { + doCFS = true; + } + else + { + long totSize = 0; + foreach(SegmentInfo info in infos) + { + totSize += Size(info); + } + long mergeSize = 0; + foreach(SegmentInfo info in infosToMerge) + { + mergeSize += Size(info); + } + + doCFS = mergeSize <= internalNoCFSRatio * totSize; + } + + return new OneMerge(infosToMerge, doCFS); + } + + /// + /// Gets or sets the largest segment (measured by document + /// count) that may be merged with other segments. + ///

Determines the largest segment (measured by + /// document count) that may be merged with other segments. + /// Small values (e.g., less than 10,000) are best for + /// interactive indexing, as this limits the length of + /// pauses while indexing to a few seconds. Larger values + /// are best for batched indexing and speedier + /// searches.

+ /// + ///

The default value is .

+ /// + ///

The default merge policy () + /// also allows you to set this + /// limit by net size (in MB) of the segment, using + /// .

+ ///

+ public virtual int MaxMergeDocs + { + set { this.maxMergeDocs = value; } + get { return maxMergeDocs; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/MergeDocIDRemapper.cs b/external/Lucene.Net.Light/src/core/Index/MergeDocIDRemapper.cs new file mode 100644 index 0000000000..2771b53cb5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/MergeDocIDRemapper.cs @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// Remaps docIDs after a merge has completed, where the + /// merged segments had at least one deletion. This is used + /// to renumber the buffered deletes in IndexWriter when a + /// merge of segments with deletions commits. + /// + + sealed class MergeDocIDRemapper + { + internal int[] starts; // used for binary search of mapped docID + internal int[] newStarts; // starts, minus the deletes + internal int[][] docMaps; // maps docIDs in the merged set + internal int minDocID; // minimum docID that needs renumbering + internal int maxDocID; // 1+ the max docID that needs renumbering + internal int docShift; // total # deleted docs that were compacted by this merge + + public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) + { + this.docMaps = docMaps; + SegmentInfo firstSegment = merge.segments.Info(0); + int i = 0; + while (true) + { + SegmentInfo info = infos.Info(i); + if (info.Equals(firstSegment)) + break; + minDocID += info.docCount; + i++; + } + + int numDocs = 0; + for (int j = 0; j < docMaps.Length; i++, j++) + { + numDocs += infos.Info(i).docCount; + System.Diagnostics.Debug.Assert(infos.Info(i).Equals(merge.segments.Info(j))); + } + maxDocID = minDocID + numDocs; + + starts = new int[docMaps.Length]; + newStarts = new int[docMaps.Length]; + + starts[0] = minDocID; + newStarts[0] = minDocID; + for (i = 1; i < docMaps.Length; i++) + { + int lastDocCount = merge.segments.Info(i - 1).docCount; + starts[i] = starts[i - 1] + lastDocCount; + newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; + } + docShift = numDocs - mergedDocCount; + + // There are rare cases when docShift is 0. It happens + // if you try to delete a docID that's out of bounds, + // because the SegmentReader still allocates deletedDocs + // and pretends it has deletions ... so we can't make + // this assert here + // assert docShift > 0; + + // Make sure it all adds up: + System.Diagnostics.Debug.Assert(docShift == maxDocID -(newStarts [docMaps.Length - 1] + merge.segments.Info(docMaps.Length - 1).docCount - delCounts [docMaps.Length - 1])); + } + + public int Remap(int oldDocID) + { + if (oldDocID < minDocID) + // Unaffected by merge + return oldDocID; + else if (oldDocID >= maxDocID) + // This doc was "after" the merge, so simple shift + return oldDocID - docShift; + else + { + // Binary search to locate this document & find its new docID + int lo = 0; // search starts array + int hi = docMaps.Length - 1; // for first element less + + while (hi >= lo) + { + int mid = Number.URShift((lo + hi), 1); + int midValue = starts[mid]; + if (oldDocID < midValue) + hi = mid - 1; + else if (oldDocID > midValue) + lo = mid + 1; + else + { + // found a match + while (mid + 1 < docMaps.Length && starts[mid + 1] == midValue) + { + mid++; // scan to last match + } + if (docMaps[mid] != null) + return newStarts[mid] + docMaps[mid][oldDocID - starts[mid]]; + else + return newStarts[mid] + oldDocID - starts[mid]; + } + } + if (docMaps[hi] != null) + return newStarts[hi] + docMaps[hi][oldDocID - starts[hi]]; + else + return newStarts[hi] + oldDocID - starts[hi]; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/MergePolicy.cs b/external/Lucene.Net.Light/src/core/Index/MergePolicy.cs new file mode 100644 index 0000000000..59b4817a96 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/MergePolicy.cs @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + ///

Expert: a MergePolicy determines the sequence of + /// primitive merge operations to be used for overall merge + /// and optimize operations.

+ /// + ///

Whenever the segments in an index have been altered by + /// , either the addition of a newly + /// flushed segment, addition of many segments from + /// addIndexes* calls, or a previous merge that may now need + /// to cascade, invokes + /// to give the MergePolicy a chance to pick + /// merges that are now required. This method returns a + /// instance describing the set of + /// merges that should be done, or null if no merges are + /// necessary. When IndexWriter.optimize is called, it calls + /// and the MergePolicy should + /// then return the necessary merges.

+ /// + ///

Note that the policy can return more than one merge at + /// a time. In this case, if the writer is using + ///, the merges will be run + /// sequentially but if it is using + /// they will be run concurrently.

+ /// + ///

The default MergePolicy is + ///.

+ /// + ///

NOTE: This API is new and still experimental + /// (subject to change suddenly in the next release)

+ /// + ///

NOTE: This class typically requires access to + /// package-private APIs (e.g. SegmentInfos) to do its job; + /// if you implement your own MergePolicy, you'll need to put + /// it in package Lucene.Net.Index in order to use + /// these APIs. + ///

+ + public abstract class MergePolicy : IDisposable + { + + /// OneMerge provides the information necessary to perform + /// an individual primitive merge operation, resulting in + /// a single new segment. The merge spec includes the + /// subset of segments to be merged as well as whether the + /// new segment should use the compound file format. + /// + + public class OneMerge + { + + internal SegmentInfo info; // used by IndexWriter + internal bool mergeDocStores; // used by IndexWriter + internal bool optimize; // used by IndexWriter + internal bool registerDone; // used by IndexWriter + internal long mergeGen; // used by IndexWriter + internal bool isExternal; // used by IndexWriter + internal int maxNumSegmentsOptimize; // used by IndexWriter + internal SegmentReader[] readers; // used by IndexWriter + internal SegmentReader[] readersClone; // used by IndexWriter + internal SegmentInfos segments; + internal bool useCompoundFile; + internal bool aborted; + internal System.Exception error; + + public OneMerge(SegmentInfos segments, bool useCompoundFile) + { + if (0 == segments.Count) + throw new ArgumentException("segments must include at least one segment", "segments"); + this.segments = segments; + this.useCompoundFile = useCompoundFile; + } + + /// Record that an exception occurred while executing + /// this merge + /// + internal virtual void SetException(System.Exception error) + { + lock (this) + { + this.error = error; + } + } + + /// Retrieve previous exception set by + ///. + /// + internal virtual System.Exception GetException() + { + lock (this) + { + return error; + } + } + + /// Mark this merge as aborted. If this is called + /// before the merge is committed then the merge will + /// not be committed. + /// + internal virtual void Abort() + { + lock (this) + { + aborted = true; + } + } + + /// Returns true if this merge was aborted. + internal virtual bool IsAborted() + { + lock (this) + { + return aborted; + } + } + + internal virtual void CheckAborted(Directory dir) + { + lock (this) + { + if (aborted) + throw new MergeAbortedException("merge is aborted: " + SegString(dir)); + } + } + + internal virtual String SegString(Directory dir) + { + var b = new System.Text.StringBuilder(); + int numSegments = segments.Count; + for (int i = 0; i < numSegments; i++) + { + if (i > 0) + b.Append(' '); + b.Append(segments.Info(i).SegString(dir)); + } + if (info != null) + b.Append(" into ").Append(info.name); + if (optimize) + b.Append(" [optimize]"); + if (mergeDocStores) + { + b.Append(" [mergeDocStores]"); + } + return b.ToString(); + } + + public SegmentInfos segments_ForNUnit + { + get { return segments; } + } + } + + /// A MergeSpecification instance provides the information + /// necessary to perform multiple merges. It simply + /// contains a list of instances. + /// + + public class MergeSpecification + { + + /// The subset of segments to be included in the primitive merge. + + public IList merges = new List(); + + public virtual void Add(OneMerge merge) + { + merges.Add(merge); + } + + public virtual String SegString(Directory dir) + { + var b = new System.Text.StringBuilder(); + b.Append("MergeSpec:\n"); + int count = merges.Count; + for (int i = 0; i < count; i++) + b.Append(" ").Append(1 + i).Append(": ").Append(merges[i].SegString(dir)); + return b.ToString(); + } + } + + /// Exception thrown if there are any problems while + /// executing a merge. + /// + [Serializable] + public class MergeException:System.SystemException + { + private readonly Directory dir; + + public MergeException(System.String message, Directory dir):base(message) + { + this.dir = dir; + } + + public MergeException(System.Exception exc, Directory dir):base(null, exc) + { + this.dir = dir; + } + + /// Returns the of the index that hit + /// the exception. + /// + public virtual Directory Directory + { + get { return dir; } + } + } + + [Serializable] + public class MergeAbortedException:System.IO.IOException + { + public MergeAbortedException():base("merge is aborted") + { + } + public MergeAbortedException(System.String message):base(message) + { + } + } + + protected internal IndexWriter writer; + + protected MergePolicy(IndexWriter writer) + { + this.writer = writer; + } + + /// Determine what set of merge operations are now necessary on the index. + /// calls this whenever there is a change to the segments. + /// This call is always synchronized on the instance so + /// only one thread at a time will call this method. + /// + /// + /// the total set of segments in the index + /// + public abstract MergeSpecification FindMerges(SegmentInfos segmentInfos); + + /// Determine what set of merge operations is necessary in order to optimize + /// the index. calls this when its + /// method is called. This call is always + /// synchronized on the instance so only one thread at a + /// time will call this method. + /// + /// + /// the total set of segments in the index + /// + /// requested maximum number of segments in the index (currently this + /// is always 1) + /// + /// contains the specific SegmentInfo instances that must be merged + /// away. This may be a subset of all SegmentInfos. + /// + public abstract MergeSpecification FindMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, + ISet segmentsToOptimize); + + /// Determine what set of merge operations is necessary in order to expunge all + /// deletes from the index. + /// + /// + /// the total set of segments in the index + /// + public abstract MergeSpecification FindMergesToExpungeDeletes(SegmentInfos segmentInfos); + + /// Release all resources for the policy. + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + /// Release all resources for the policy. + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + + /// Returns true if a newly flushed (not from merge) + /// segment should use the compound file format. + /// + public abstract bool UseCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + + /// Returns true if the doc store files should use the + /// compound file format. + /// + public abstract bool UseCompoundDocStore(SegmentInfos segments); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/MergeScheduler.cs b/external/Lucene.Net.Light/src/core/Index/MergeScheduler.cs new file mode 100644 index 0000000000..7fbf83d8d7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/MergeScheduler.cs @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + ///

Expert: uses an instance + /// implementing this interface to execute the merges + /// selected by a . The default + /// MergeScheduler is .

+ /// + ///

NOTE: This API is new and still experimental + /// (subject to change suddenly in the next release)

+ /// + ///

NOTE: This class typically requires access to + /// package-private APIs (eg, SegmentInfos) to do its job; + /// if you implement your own MergePolicy, you'll need to put + /// it in package Lucene.Net.Index in order to use + /// these APIs. + ///

+ + public abstract class MergeScheduler : IDisposable + { + + /// Run the merges provided by . + public abstract void Merge(IndexWriter writer); + + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + /// Close this MergeScheduler. + protected abstract void Dispose(bool disposing); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/MultiLevelSkipListReader.cs b/external/Lucene.Net.Light/src/core/Index/MultiLevelSkipListReader.cs new file mode 100644 index 0000000000..28b4fd57af --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/MultiLevelSkipListReader.cs @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; +using IndexInput = Lucene.Net.Store.IndexInput; + +namespace Lucene.Net.Index +{ + + /// This abstract class reads skip lists with multiple levels. + /// + /// See for the information about the encoding + /// of the multi level skip lists. + /// + /// Subclasses must implement the abstract method + /// which defines the actual format of the skip data. + /// + abstract class MultiLevelSkipListReader : IDisposable + { + // the maximum number of skip levels possible for this index + private readonly int maxNumberOfSkipLevels; + + // number of levels in this skip list + private int numberOfSkipLevels; + + // Expert: defines the number of top skip levels to buffer in memory. + // Reducing this number results in less memory usage, but possibly + // slower performance due to more random I/Os. + // Please notice that the space each level occupies is limited by + // the skipInterval. The top level can not contain more than + // skipLevel entries, the second top level can not contain more + // than skipLevel^2 entries and so forth. + private const int numberOfLevelsToBuffer = 1; + + private int docCount; + private bool haveSkipped; + + private bool isDisposed; + + private readonly IndexInput[] skipStream; // skipStream for each level + private readonly long[] skipPointer; // the start pointer of each skip level + private readonly int[] skipInterval; // skipInterval of each level + private readonly int[] numSkipped; // number of docs skipped per level + + private readonly int[] skipDoc; // doc id of current skip entry per level + private int lastDoc; // doc id of last read skip entry with docId <= target + private readonly long[] childPointer; // child pointer of current skip entry per level + private long lastChildPointer; // childPointer of last read skip entry with docId <= target + + private readonly bool inputIsBuffered; + + protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) + { + this.skipStream = new IndexInput[maxSkipLevels]; + this.skipPointer = new long[maxSkipLevels]; + this.childPointer = new long[maxSkipLevels]; + this.numSkipped = new int[maxSkipLevels]; + this.maxNumberOfSkipLevels = maxSkipLevels; + this.skipInterval = new int[maxSkipLevels]; + this.skipStream[0] = skipStream; + this.inputIsBuffered = (skipStream is BufferedIndexInput); + this.skipInterval[0] = skipInterval; + for (int i = 1; i < maxSkipLevels; i++) + { + // cache skip intervals + this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval; + } + skipDoc = new int[maxSkipLevels]; + } + + + /// Returns the id of the doc to which the last call of + /// has skipped. + /// + internal virtual int GetDoc() + { + return lastDoc; + } + + + /// Skips entries to the first beyond the current whose document number is + /// greater than or equal to target. Returns the current doc count. + /// + internal virtual int SkipTo(int target) + { + if (!haveSkipped) + { + // first time, load skip levels + LoadSkipLevels(); + haveSkipped = true; + } + + // walk up the levels until highest level is found that has a skip + // for this target + int level = 0; + while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) + { + level++; + } + + while (level >= 0) + { + if (target > skipDoc[level]) + { + if (!LoadNextSkip(level)) + { + continue; + } + } + else + { + // no more skips on this level, go down one level + if (level > 0 && lastChildPointer > skipStream[level - 1].FilePointer) + { + SeekChild(level - 1); + } + level--; + } + } + + return numSkipped[0] - skipInterval[0] - 1; + } + + private bool LoadNextSkip(int level) + { + // we have to skip, the target document is greater than the current + // skip list entry + SetLastSkipData(level); + + numSkipped[level] += skipInterval[level]; + + if (numSkipped[level] > docCount) + { + // this skip list is exhausted + skipDoc[level] = System.Int32.MaxValue; + if (numberOfSkipLevels > level) + numberOfSkipLevels = level; + return false; + } + + // read next skip entry + skipDoc[level] += ReadSkipData(level, skipStream[level]); + + if (level != 0) + { + // read the child pointer if we are not on the leaf level + childPointer[level] = skipStream[level].ReadVLong() + skipPointer[level - 1]; + } + + return true; + } + + /// Seeks the skip entry on the given level + protected internal virtual void SeekChild(int level) + { + skipStream[level].Seek(lastChildPointer); + numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1]; + skipDoc[level] = lastDoc; + if (level > 0) + { + childPointer[level] = skipStream[level].ReadVLong() + skipPointer[level - 1]; + } + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + for (int i = 1; i < skipStream.Length; i++) + { + if (skipStream[i] != null) + { + skipStream[i].Close(); + } + } + } + + isDisposed = true; + } + + /// initializes the reader + internal virtual void Init(long skipPointer, int df) + { + this.skipPointer[0] = skipPointer; + this.docCount = df; + System.Array.Clear(skipDoc, 0, skipDoc.Length); + System.Array.Clear(numSkipped, 0, numSkipped.Length); + System.Array.Clear(childPointer, 0, childPointer.Length); + + haveSkipped = false; + for (int i = 1; i < numberOfSkipLevels; i++) + { + skipStream[i] = null; + } + } + + /// Loads the skip levels + private void LoadSkipLevels() + { + numberOfSkipLevels = docCount == 0?0:(int) System.Math.Floor(System.Math.Log(docCount) / System.Math.Log(skipInterval[0])); + if (numberOfSkipLevels > maxNumberOfSkipLevels) + { + numberOfSkipLevels = maxNumberOfSkipLevels; + } + + skipStream[0].Seek(skipPointer[0]); + + int toBuffer = numberOfLevelsToBuffer; + + for (int i = numberOfSkipLevels - 1; i > 0; i--) + { + // the length of the current level + long length = skipStream[0].ReadVLong(); + + // the start pointer of the current level + skipPointer[i] = skipStream[0].FilePointer; + if (toBuffer > 0) + { + // buffer this level + skipStream[i] = new SkipBuffer(skipStream[0], (int) length); + toBuffer--; + } + else + { + // clone this stream, it is already at the start of the current level + skipStream[i] = (IndexInput) skipStream[0].Clone(); + if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) + { + ((BufferedIndexInput) skipStream[i]).SetBufferSize((int) length); + } + + // move base stream beyond the current level + skipStream[0].Seek(skipStream[0].FilePointer + length); + } + } + + // use base stream for the lowest level + skipPointer[0] = skipStream[0].FilePointer; + } + + /// Subclasses must implement the actual skip data encoding in this method. + /// + /// + /// the level skip data shall be read from + /// + /// the skip stream to read from + /// + protected internal abstract int ReadSkipData(int level, IndexInput skipStream); + + /// Copies the values of the last read skip entry on this level + protected internal virtual void SetLastSkipData(int level) + { + lastDoc = skipDoc[level]; + lastChildPointer = childPointer[level]; + } + + + /// used to buffer the top skip levels + private sealed class SkipBuffer : IndexInput + { + private byte[] data; + private readonly long pointer; + private int pos; + + private bool isDisposed; + + internal SkipBuffer(IndexInput input, int length) + { + data = new byte[length]; + pointer = input.FilePointer; + input.ReadBytes(data, 0, length); + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + if (disposing) + { + data = null; + } + + isDisposed = true; + } + + public override long FilePointer + { + get { return pointer + pos; } + } + + public override long Length() + { + return data.Length; + } + + public override byte ReadByte() + { + return data[pos++]; + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + Array.Copy(data, pos, b, offset, len); + pos += len; + } + + public override void Seek(long pos) + { + this.pos = (int) (pos - pointer); + } + + override public System.Object Clone() + { + System.Diagnostics.Debug.Fail("Port issue:", "Lets see if we need this FilterIndexReader.Clone()"); // {{Aroush-2.9}} + return null; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/MultiLevelSkipListWriter.cs b/external/Lucene.Net.Light/src/core/Index/MultiLevelSkipListWriter.cs new file mode 100644 index 0000000000..00543f2c84 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/MultiLevelSkipListWriter.cs @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexOutput = Lucene.Net.Store.IndexOutput; +using RAMOutputStream = Lucene.Net.Store.RAMOutputStream; + +namespace Lucene.Net.Index +{ + + /// This abstract class writes skip lists with multiple levels. + /// + /// Example for skipInterval = 3: + /// c (skip level 2) + /// c c c (skip level 1) + /// x x x x x x x x x x (skip level 0) + /// d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) + /// 3 6 9 12 15 18 21 24 27 30 (df) + /// + /// d - document + /// x - skip data + /// c - skip data with child pointer + /// + /// Skip level i contains every skipInterval-th entry from skip level i-1. + /// Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))). + /// + /// Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1. + /// This guarantess a logarithmic amount of skips to find the target document. + /// + /// While this class takes care of writing the different skip levels, + /// subclasses must define the actual format of the skip data. + /// + /// + abstract class MultiLevelSkipListWriter + { + // number of levels in this skip list + private int numberOfSkipLevels; + + // the skip interval in the list with level = 0 + private int skipInterval; + + // for every skip level a different buffer is used + private RAMOutputStream[] skipBuffer; + + protected internal MultiLevelSkipListWriter(int skipInterval, int maxSkipLevels, int df) + { + this.skipInterval = skipInterval; + + // calculate the maximum number of skip levels for this document frequency + numberOfSkipLevels = df == 0?0:(int) System.Math.Floor(System.Math.Log(df) / System.Math.Log(skipInterval)); + + // make sure it does not exceed maxSkipLevels + if (numberOfSkipLevels > maxSkipLevels) + { + numberOfSkipLevels = maxSkipLevels; + } + } + + protected internal virtual void Init() + { + skipBuffer = new RAMOutputStream[numberOfSkipLevels]; + for (int i = 0; i < numberOfSkipLevels; i++) + { + skipBuffer[i] = new RAMOutputStream(); + } + } + + protected internal virtual void ResetSkip() + { + // creates new buffers or empties the existing ones + if (skipBuffer == null) + { + Init(); + } + else + { + for (int i = 0; i < skipBuffer.Length; i++) + { + skipBuffer[i].Reset(); + } + } + } + + /// Subclasses must implement the actual skip data encoding in this method. + /// + /// + /// the level skip data shall be writting for + /// + /// the skip buffer to write to + /// + protected internal abstract void WriteSkipData(int level, IndexOutput skipBuffer); + + /// Writes the current skip data to the buffers. The current document frequency determines + /// the max level is skip data is to be written to. + /// + /// + /// the current document frequency + /// + /// IOException + internal virtual void BufferSkip(int df) + { + int numLevels; + + // determine max level + for (numLevels = 0; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) + { + numLevels++; + } + + long childPointer = 0; + + for (int level = 0; level < numLevels; level++) + { + WriteSkipData(level, skipBuffer[level]); + + long newChildPointer = skipBuffer[level].FilePointer; + + if (level != 0) + { + // store child pointers for all levels except the lowest + skipBuffer[level].WriteVLong(childPointer); + } + + //remember the childPointer for the next level + childPointer = newChildPointer; + } + } + + /// Writes the buffered skip lists to the given output. + /// + /// + /// the IndexOutput the skip lists shall be written to + /// + /// the pointer the skip list starts + /// + internal virtual long WriteSkip(IndexOutput output) + { + long skipPointer = output.FilePointer; + if (skipBuffer == null || skipBuffer.Length == 0) + return skipPointer; + + for (int level = numberOfSkipLevels - 1; level > 0; level--) + { + long length = skipBuffer[level].FilePointer; + if (length > 0) + { + output.WriteVLong(length); + skipBuffer[level].WriteTo(output); + } + } + skipBuffer[0].WriteTo(output); + + return skipPointer; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/MultiReader.cs b/external/Lucene.Net.Light/src/core/Index/MultiReader.cs new file mode 100644 index 0000000000..a441cb74a7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/MultiReader.cs @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using Lucene.Net.Support; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using MultiTermDocs = Lucene.Net.Index.DirectoryReader.MultiTermDocs; +using MultiTermEnum = Lucene.Net.Index.DirectoryReader.MultiTermEnum; +using MultiTermPositions = Lucene.Net.Index.DirectoryReader.MultiTermPositions; +using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity; + +namespace Lucene.Net.Index +{ + + /// An IndexReader which reads multiple indexes, appending + /// their content. + /// + public class MultiReader:IndexReader, System.ICloneable + { + protected internal IndexReader[] subReaders; + private int[] starts; // 1st docno for each segment + private bool[] decrefOnClose; // remember which subreaders to decRef on close + private System.Collections.Generic.IDictionary normsCache = new HashMap(); + private int maxDoc = 0; + private int numDocs = - 1; + private bool hasDeletions = false; + + ///

Construct a MultiReader aggregating the named set of (sub)readers. + /// Directory locking for delete, undeleteAll, and setNorm operations is + /// left to the subreaders.

+ ///

Note that all subreaders are closed if this Multireader is closed.

+ ///

+ /// set of (sub)readers + /// + /// IOException + public MultiReader(params IndexReader[] subReaders) + { + Initialize(subReaders, true); + } + + ///

Construct a MultiReader aggregating the named set of (sub)readers. + /// Directory locking for delete, undeleteAll, and setNorm operations is + /// left to the subreaders.

+ ///

+ /// indicates whether the subreaders should be closed + /// when this MultiReader is closed + /// + /// set of (sub)readers + /// + /// IOException + public MultiReader(IndexReader[] subReaders, bool closeSubReaders) + { + Initialize(subReaders, closeSubReaders); + } + + private void Initialize(IndexReader[] subReaders, bool closeSubReaders) + { + // Deep copy + this.subReaders = subReaders.ToArray(); + starts = new int[subReaders.Length + 1]; // build starts array + decrefOnClose = new bool[subReaders.Length]; + for (int i = 0; i < subReaders.Length; i++) + { + starts[i] = maxDoc; + maxDoc += subReaders[i].MaxDoc; // compute maxDocs + + if (!closeSubReaders) + { + subReaders[i].IncRef(); + decrefOnClose[i] = true; + } + else + { + decrefOnClose[i] = false; + } + + if (subReaders[i].HasDeletions) + hasDeletions = true; + } + starts[subReaders.Length] = maxDoc; + } + + /// Tries to reopen the subreaders. + ///
+ /// If one or more subreaders could be re-opened (i. e. subReader.reopen() + /// returned a new instance != subReader), then a new MultiReader instance + /// is returned, otherwise this instance is returned. + ///

+ /// A re-opened instance might share one or more subreaders with the old + /// instance. Index modification operations result in undefined behavior + /// when performed before the old instance is closed. + /// (see ). + ///

+ /// If subreaders are shared, then the reference count of those + /// readers is increased to ensure that the subreaders remain open + /// until the last referring reader is closed. + /// + ///

+ /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public override IndexReader Reopen() + { + lock (this) + { + return DoReopen(false); + } + } + + /// Clones the subreaders. + /// (see ). + ///
+ ///

+ /// If subreaders are shared, then the reference count of those + /// readers is increased to ensure that the subreaders remain open + /// until the last referring reader is closed. + ///

+ public override System.Object Clone() + { + try + { + return DoReopen(true); + } + catch (System.Exception ex) + { + throw new System.SystemException(ex.Message, ex); + } + } + + /// If clone is true then we clone each of the subreaders + /// + /// + /// New IndexReader, or same one (this) if + /// reopen/clone is not necessary + /// + /// CorruptIndexException + /// IOException + protected internal virtual IndexReader DoReopen(bool doClone) + { + EnsureOpen(); + + bool reopened = false; + IndexReader[] newSubReaders = new IndexReader[subReaders.Length]; + + bool success = false; + try + { + for (int i = 0; i < subReaders.Length; i++) + { + if (doClone) + newSubReaders[i] = (IndexReader) subReaders[i].Clone(); + else + newSubReaders[i] = subReaders[i].Reopen(); + // if at least one of the subreaders was updated we remember that + // and return a new MultiReader + if (newSubReaders[i] != subReaders[i]) + { + reopened = true; + } + } + success = true; + } + finally + { + if (!success && reopened) + { + for (int i = 0; i < newSubReaders.Length; i++) + { + if (newSubReaders[i] != subReaders[i]) + { + try + { + newSubReaders[i].Close(); + } + catch (System.IO.IOException) + { + // keep going - we want to clean up as much as possible + } + } + } + } + } + + if (reopened) + { + bool[] newDecrefOnClose = new bool[subReaders.Length]; + for (int i = 0; i < subReaders.Length; i++) + { + if (newSubReaders[i] == subReaders[i]) + { + newSubReaders[i].IncRef(); + newDecrefOnClose[i] = true; + } + } + MultiReader mr = new MultiReader(newSubReaders); + mr.decrefOnClose = newDecrefOnClose; + return mr; + } + else + { + return this; + } + } + + public override ITermFreqVector[] GetTermFreqVectors(int n) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].GetTermFreqVectors(n - starts[i]); // dispatch to segment + } + + public override ITermFreqVector GetTermFreqVector(int n, System.String field) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].GetTermFreqVector(n - starts[i], field); + } + + + public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) + { + EnsureOpen(); + int i = ReaderIndex(docNumber); // find segment num + subReaders[i].GetTermFreqVector(docNumber - starts[i], field, mapper); + } + + public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) + { + EnsureOpen(); + int i = ReaderIndex(docNumber); // find segment num + subReaders[i].GetTermFreqVector(docNumber - starts[i], mapper); + } + + public override bool IsOptimized() + { + return false; + } + + public override int NumDocs() + { + // Don't call ensureOpen() here (it could affect performance) + // NOTE: multiple threads may wind up init'ing + // numDocs... but that's harmless + if (numDocs == - 1) + { + // check cache + int n = 0; // cache miss--recompute + for (int i = 0; i < subReaders.Length; i++) + n += subReaders[i].NumDocs(); // sum from readers + numDocs = n; + } + return numDocs; + } + + public override int MaxDoc + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return maxDoc; + } + } + + // inherit javadoc + public override Document Document(int n, FieldSelector fieldSelector) + { + EnsureOpen(); + int i = ReaderIndex(n); // find segment num + return subReaders[i].Document(n - starts[i], fieldSelector); // dispatch to segment reader + } + + public override bool IsDeleted(int n) + { + // Don't call ensureOpen() here (it could affect performance) + int i = ReaderIndex(n); // find segment num + return subReaders[i].IsDeleted(n - starts[i]); // dispatch to segment reader + } + + public override bool HasDeletions + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return hasDeletions; + } + } + + protected internal override void DoDelete(int n) + { + numDocs = - 1; // invalidate cache + int i = ReaderIndex(n); // find segment num + subReaders[i].DeleteDocument(n - starts[i]); // dispatch to segment reader + hasDeletions = true; + } + + protected internal override void DoUndeleteAll() + { + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].UndeleteAll(); + + hasDeletions = false; + numDocs = - 1; // invalidate cache + } + + private int ReaderIndex(int n) + { + // find reader for doc n: + return DirectoryReader.ReaderIndex(n, this.starts, this.subReaders.Length); + } + + public override bool HasNorms(System.String field) + { + EnsureOpen(); + for (int i = 0; i < subReaders.Length; i++) + { + if (subReaders[i].HasNorms(field)) + return true; + } + return false; + } + + public override byte[] Norms(System.String field) + { + lock (this) + { + EnsureOpen(); + byte[] bytes = normsCache[field]; + if (bytes != null) + return bytes; // cache hit + if (!HasNorms(field)) + return null; + + bytes = new byte[MaxDoc]; + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].Norms(field, bytes, starts[i]); + normsCache[field] = bytes; // update cache + return bytes; + } + } + + public override void Norms(System.String field, byte[] result, int offset) + { + lock (this) + { + EnsureOpen(); + byte[] bytes = normsCache[field]; + for (int i = 0; i < subReaders.Length; i++) + // read from segments + subReaders[i].Norms(field, result, offset + starts[i]); + + if (bytes == null && !HasNorms(field)) + { + for (int i = offset; i < result.Length; i++) + { + result[i] = (byte) DefaultSimilarity.EncodeNorm(1.0f); + } + } + else if (bytes != null) + { + // cache hit + Array.Copy(bytes, 0, result, offset, MaxDoc); + } + else + { + for (int i = 0; i < subReaders.Length; i++) + { + // read from segments + subReaders[i].Norms(field, result, offset + starts[i]); + } + } + } + } + + protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed) + { + lock (normsCache) + { + normsCache.Remove(field); // clear cache + } + int i = ReaderIndex(n); // find segment num + subReaders[i].SetNorm(n - starts[i], field, value_Renamed); // dispatch + } + + public override TermEnum Terms() + { + EnsureOpen(); + return new MultiTermEnum(this, subReaders, starts, null); + } + + public override TermEnum Terms(Term term) + { + EnsureOpen(); + return new MultiTermEnum(this, subReaders, starts, term); + } + + public override int DocFreq(Term t) + { + EnsureOpen(); + int total = 0; // sum freqs in segments + for (int i = 0; i < subReaders.Length; i++) + total += subReaders[i].DocFreq(t); + return total; + } + + public override TermDocs TermDocs() + { + EnsureOpen(); + return new MultiTermDocs(this, subReaders, starts); + } + + public override TermPositions TermPositions() + { + EnsureOpen(); + return new MultiTermPositions(this, subReaders, starts); + } + + protected internal override void DoCommit(System.Collections.Generic.IDictionary commitUserData) + { + for (int i = 0; i < subReaders.Length; i++) + subReaders[i].Commit(commitUserData); + } + + protected internal override void DoClose() + { + lock (this) + { + for (int i = 0; i < subReaders.Length; i++) + { + if (decrefOnClose[i]) + { + subReaders[i].DecRef(); + } + else + { + subReaders[i].Close(); + } + } + } + + // NOTE: only needed in case someone had asked for + // FieldCache for top-level reader (which is generally + // not a good idea): + Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this); + } + + public override System.Collections.Generic.ICollection GetFieldNames(IndexReader.FieldOption fieldNames) + { + EnsureOpen(); + return DirectoryReader.GetFieldNames(fieldNames, this.subReaders); + } + + /// Checks recursively if all subreaders are up to date. + public override bool IsCurrent() + { + for (int i = 0; i < subReaders.Length; i++) + { + if (!subReaders[i].IsCurrent()) + { + return false; + } + } + + // all subreaders are up to date + return true; + } + + /// Not implemented. + /// UnsupportedOperationException + public override long Version + { + get { throw new System.NotSupportedException("MultiReader does not support this method."); } + } + + public override IndexReader[] GetSequentialSubReaders() + { + return subReaders; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/MultipleTermPositions.cs b/external/Lucene.Net.Light/src/core/Index/MultipleTermPositions.cs new file mode 100644 index 0000000000..eab3dd5553 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/MultipleTermPositions.cs @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Util; + +namespace Lucene.Net.Index +{ + + /// Allows you to iterate over the for multiple s as + /// a single . + /// + /// + public class MultipleTermPositions : TermPositions + { + private sealed class TermPositionsQueue : PriorityQueue + { + internal TermPositionsQueue(LinkedList termPositions) + { + Initialize(termPositions.Count); + + foreach(TermPositions tp in termPositions) + if (tp.Next()) + Add(tp); + } + + internal TermPositions Peek() + { + return Top(); + } + + public override bool LessThan(TermPositions a, TermPositions b) + { + return a.Doc < b.Doc; + } + } + + private sealed class IntQueue + { + public IntQueue() + { + InitBlock(); + } + private void InitBlock() + { + _array = new int[_arraySize]; + } + private int _arraySize = 16; + private int _index = 0; + private int _lastIndex = 0; + private int[] _array; + + internal void add(int i) + { + if (_lastIndex == _arraySize) + growArray(); + + _array[_lastIndex++] = i; + } + + internal int next() + { + return _array[_index++]; + } + + internal void sort() + { + System.Array.Sort(_array, _index, _lastIndex - _index); + } + + internal void clear() + { + _index = 0; + _lastIndex = 0; + } + + internal int size() + { + return (_lastIndex - _index); + } + + private void growArray() + { + int[] newArray = new int[_arraySize * 2]; + Array.Copy(_array, 0, newArray, 0, _arraySize); + _array = newArray; + _arraySize *= 2; + } + } + + private int _doc; + private int _freq; + private TermPositionsQueue _termPositionsQueue; + private IntQueue _posList; + + private bool isDisposed; + /// Creates a new MultipleTermPositions instance. + /// + /// + /// + /// + public MultipleTermPositions(IndexReader indexReader, Term[] terms) + { + var termPositions = new System.Collections.Generic.LinkedList(); + + for (int i = 0; i < terms.Length; i++) + termPositions.AddLast(indexReader.TermPositions(terms[i])); + + _termPositionsQueue = new TermPositionsQueue(termPositions); + _posList = new IntQueue(); + } + + public bool Next() + { + if (_termPositionsQueue.Size() == 0) + return false; + + _posList.clear(); + _doc = _termPositionsQueue.Peek().Doc; + + TermPositions tp; + do + { + tp = _termPositionsQueue.Peek(); + + for (int i = 0; i < tp.Freq; i++) + _posList.add(tp.NextPosition()); + + if (tp.Next()) + _termPositionsQueue.UpdateTop(); + else + { + _termPositionsQueue.Pop(); + tp.Close(); + } + } + while (_termPositionsQueue.Size() > 0 && _termPositionsQueue.Peek().Doc == _doc); + + _posList.sort(); + _freq = _posList.size(); + + return true; + } + + public int NextPosition() + { + return _posList.next(); + } + + public bool SkipTo(int target) + { + while (_termPositionsQueue.Peek() != null && target > _termPositionsQueue.Peek().Doc) + { + TermPositions tp = _termPositionsQueue.Pop(); + if (tp.SkipTo(target)) + _termPositionsQueue.Add(tp); + else + tp.Close(); + } + return Next(); + } + + public int Doc + { + get { return _doc; } + } + + public int Freq + { + get { return _freq; } + } + + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + while (_termPositionsQueue.Size() > 0) + _termPositionsQueue.Pop().Close(); + } + + isDisposed = true; + } + + /// Not implemented. + /// UnsupportedOperationException + public virtual void Seek(Term arg0) + { + throw new System.NotSupportedException(); + } + + /// Not implemented. + /// UnsupportedOperationException + public virtual void Seek(TermEnum termEnum) + { + throw new System.NotSupportedException(); + } + + /// Not implemented. + /// UnsupportedOperationException + public virtual int Read(int[] arg0, int[] arg1) + { + throw new System.NotSupportedException(); + } + + + /// Not implemented. + /// UnsupportedOperationException + public virtual int PayloadLength + { + get { throw new System.NotSupportedException(); } + } + + /// Not implemented. + /// UnsupportedOperationException + public virtual byte[] GetPayload(byte[] data, int offset) + { + throw new System.NotSupportedException(); + } + + /// + /// false +// TODO: Remove warning after API has been finalized + public virtual bool IsPayloadAvailable + { + get { return false; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/NormsWriter.cs b/external/Lucene.Net.Light/src/core/Index/NormsWriter.cs new file mode 100644 index 0000000000..507d69c46d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/NormsWriter.cs @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using Similarity = Lucene.Net.Search.Similarity; + +namespace Lucene.Net.Index +{ + + // TODO FI: norms could actually be stored as doc store + + /// Writes norms. Each thread X field accumulates the norms + /// for the doc/fields it saw, then the flush method below + /// merges all of these together into a single _X.nrm file. + /// + + sealed class NormsWriter : InvertedDocEndConsumer + { + + private static readonly byte defaultNorm; + private FieldInfos fieldInfos; + public override InvertedDocEndConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread) + { + return new NormsWriterPerThread(docInverterPerThread, this); + } + + public override void Abort() + { + } + + // We only write the _X.nrm file at flush + internal void Files(ICollection files) + { + } + + internal override void SetFieldInfos(FieldInfos fieldInfos) + { + this.fieldInfos = fieldInfos; + } + + /// Produce _X.nrm if any document had a field with norms + /// not disabled + /// + public override void Flush(IDictionary> threadsAndFields, SegmentWriteState state) + { + + IDictionary> byField = new HashMap>(); + + // Typically, each thread will have encountered the same + // field. So first we collate by field, ie, all + // per-thread field instances that correspond to the + // same FieldInfo + foreach(var entry in threadsAndFields) + { + ICollection fields = entry.Value; + IEnumerator fieldsIt = fields.GetEnumerator(); + var fieldsToRemove = new HashSet(); + while (fieldsIt.MoveNext()) + { + NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.Current; + + if (perField.upto > 0) + { + // It has some norms + IList l = byField[perField.fieldInfo]; + if (l == null) + { + l = new List(); + byField[perField.fieldInfo] = l; + } + l.Add(perField); + } + // Remove this field since we haven't seen it + // since the previous flush + else + { + fieldsToRemove.Add(perField); + } + } + foreach (var field in fieldsToRemove) + { + fields.Remove(field); + } + } + + System.String normsFileName = state.segmentName + "." + IndexFileNames.NORMS_EXTENSION; + state.flushedFiles.Add(normsFileName); + IndexOutput normsOut = state.directory.CreateOutput(normsFileName); + + try + { + normsOut.WriteBytes(SegmentMerger.NORMS_HEADER, 0, SegmentMerger.NORMS_HEADER.Length); + + int numField = fieldInfos.Size(); + + int normCount = 0; + + for (int fieldNumber = 0; fieldNumber < numField; fieldNumber++) + { + + FieldInfo fieldInfo = fieldInfos.FieldInfo(fieldNumber); + + IList toMerge = byField[fieldInfo]; + int upto = 0; + if (toMerge != null) + { + + int numFields = toMerge.Count; + + normCount++; + + NormsWriterPerField[] fields = new NormsWriterPerField[numFields]; + int[] uptos = new int[numFields]; + + for (int j = 0; j < numFields; j++) + fields[j] = toMerge[j]; + + int numLeft = numFields; + + while (numLeft > 0) + { + + System.Diagnostics.Debug.Assert(uptos [0] < fields [0].docIDs.Length, " uptos[0]=" + uptos [0] + " len=" +(fields [0].docIDs.Length)); + + int minLoc = 0; + int minDocID = fields[0].docIDs[uptos[0]]; + + for (int j = 1; j < numLeft; j++) + { + int docID = fields[j].docIDs[uptos[j]]; + if (docID < minDocID) + { + minDocID = docID; + minLoc = j; + } + } + + System.Diagnostics.Debug.Assert(minDocID < state.numDocs); + + // Fill hole + for (; upto < minDocID; upto++) + normsOut.WriteByte(defaultNorm); + + normsOut.WriteByte(fields[minLoc].norms[uptos[minLoc]]); + (uptos[minLoc])++; + upto++; + + if (uptos[minLoc] == fields[minLoc].upto) + { + fields[minLoc].Reset(); + if (minLoc != numLeft - 1) + { + fields[minLoc] = fields[numLeft - 1]; + uptos[minLoc] = uptos[numLeft - 1]; + } + numLeft--; + } + } + + // Fill final hole with defaultNorm + for (; upto < state.numDocs; upto++) + normsOut.WriteByte(defaultNorm); + } + else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) + { + normCount++; + // Fill entire field with default norm: + for (; upto < state.numDocs; upto++) + normsOut.WriteByte(defaultNorm); + } + + System.Diagnostics.Debug.Assert(4 + normCount * state.numDocs == normsOut.FilePointer, ".nrm file size mismatch: expected=" +(4 + normCount * state.numDocs) + " actual=" + normsOut.FilePointer); + } + } + finally + { + normsOut.Close(); + } + } + + internal override void CloseDocStore(SegmentWriteState state) + { + } + static NormsWriter() + { + defaultNorm = Similarity.EncodeNorm(1.0f); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/NormsWriterPerField.cs b/external/Lucene.Net.Light/src/core/Index/NormsWriterPerField.cs new file mode 100644 index 0000000000..81d45df148 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/NormsWriterPerField.cs @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using ArrayUtil = Lucene.Net.Util.ArrayUtil; +using Similarity = Lucene.Net.Search.Similarity; + +namespace Lucene.Net.Index +{ + + /// Taps into DocInverter, as an InvertedDocEndConsumer, + /// which is called at the end of inverting each field. We + /// just look at the length for the field (docState.length) + /// and record the norm. + /// + + sealed class NormsWriterPerField:InvertedDocEndConsumerPerField, System.IComparable + { + + internal NormsWriterPerThread perThread; + internal FieldInfo fieldInfo; + internal DocumentsWriter.DocState docState; + + // Holds all docID/norm pairs we've seen + internal int[] docIDs = new int[1]; + internal byte[] norms = new byte[1]; + internal int upto; + + internal FieldInvertState fieldState; + + public void Reset() + { + // Shrink back if we are overallocated now: + docIDs = ArrayUtil.Shrink(docIDs, upto); + norms = ArrayUtil.Shrink(norms, upto); + upto = 0; + } + + public NormsWriterPerField(DocInverterPerField docInverterPerField, NormsWriterPerThread perThread, FieldInfo fieldInfo) + { + this.perThread = perThread; + this.fieldInfo = fieldInfo; + docState = perThread.docState; + fieldState = docInverterPerField.fieldState; + } + + internal override void Abort() + { + upto = 0; + } + + public int CompareTo(NormsWriterPerField other) + { + return String.CompareOrdinal(fieldInfo.name, other.fieldInfo.name); + } + + internal override void Finish() + { + System.Diagnostics.Debug.Assert(docIDs.Length == norms.Length); + if (fieldInfo.isIndexed && !fieldInfo.omitNorms) + { + if (docIDs.Length <= upto) + { + System.Diagnostics.Debug.Assert(docIDs.Length == upto); + docIDs = ArrayUtil.Grow(docIDs, 1 + upto); + norms = ArrayUtil.Grow(norms, 1 + upto); + } + float norm = docState.similarity.ComputeNorm(fieldInfo.name, fieldState); + norms[upto] = Similarity.EncodeNorm(norm); + docIDs[upto] = docState.docID; + upto++; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/NormsWriterPerThread.cs b/external/Lucene.Net.Light/src/core/Index/NormsWriterPerThread.cs new file mode 100644 index 0000000000..d5cd5ed85e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/NormsWriterPerThread.cs @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + sealed class NormsWriterPerThread:InvertedDocEndConsumerPerThread + { + internal NormsWriter normsWriter; + internal DocumentsWriter.DocState docState; + + public NormsWriterPerThread(DocInverterPerThread docInverterPerThread, NormsWriter normsWriter) + { + this.normsWriter = normsWriter; + docState = docInverterPerThread.docState; + } + + internal override InvertedDocEndConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo) + { + return new NormsWriterPerField(docInverterPerField, this, fieldInfo); + } + + internal override void Abort() + { + } + + internal override void StartDocument() + { + } + internal override void FinishDocument() + { + } + + internal bool FreeRAM() + { + return false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/ParallelReader.cs b/external/Lucene.Net.Light/src/core/Index/ParallelReader.cs new file mode 100644 index 0000000000..e0b4b04f7b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/ParallelReader.cs @@ -0,0 +1,822 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Support; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult; + +namespace Lucene.Net.Index +{ + /// An IndexReader which reads multiple, parallel indexes. Each index added + /// must have the same number of documents, but typically each contains + /// different fields. Each document contains the union of the fields of all + /// documents with the same document number. When searching, matches for a + /// query term are from the first index added that has the field. + /// + ///

This is useful, e.g., with collections that have large fields which + /// change rarely and small fields that change more frequently. The smaller + /// fields may be re-indexed in a new index and both indexes may be searched + /// together. + /// + ///

Warning: It is up to you to make sure all indexes + /// are created and modified the same way. For example, if you add + /// documents to one index, you need to add the same documents in the + /// same order to the other indexes. Failure to do so will result in + /// undefined behavior. + ///

+ public class ParallelReader:IndexReader, System.ICloneable + { + private List readers = new List(); + private List decrefOnClose = new List(); // remember which subreaders to decRef on close + internal bool incRefReaders = false; + private SortedDictionary fieldToReader = new SortedDictionary(); + private IDictionary> readerToFields = new HashMap>(); + private List storedFieldReaders = new List(); + + private int maxDoc; + private int numDocs; + private bool hasDeletions; + + /// Construct a ParallelReader. + ///

Note that all subreaders are closed if this ParallelReader is closed.

+ ///

+ public ParallelReader():this(true) + { + } + + /// Construct a ParallelReader. + /// indicates whether the subreaders should be closed + /// when this ParallelReader is closed + /// + public ParallelReader(bool closeSubReaders):base() + { + this.incRefReaders = !closeSubReaders; + } + + /// Add an IndexReader. + /// IOException if there is a low-level IO error + public virtual void Add(IndexReader reader) + { + EnsureOpen(); + Add(reader, false); + } + + /// Add an IndexReader whose stored fields will not be returned. This can + /// accellerate search when stored fields are only needed from a subset of + /// the IndexReaders. + /// + /// + /// IllegalArgumentException if not all indexes contain the same number + /// of documents + /// + /// IllegalArgumentException if not all indexes have the same value + /// of + /// + /// IOException if there is a low-level IO error + public virtual void Add(IndexReader reader, bool ignoreStoredFields) + { + + EnsureOpen(); + if (readers.Count == 0) + { + this.maxDoc = reader.MaxDoc; + this.numDocs = reader.NumDocs(); + this.hasDeletions = reader.HasDeletions; + } + + if (reader.MaxDoc != maxDoc) + // check compatibility + throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc); + if (reader.NumDocs() != numDocs) + throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs()); + + ICollection fields = reader.GetFieldNames(IndexReader.FieldOption.ALL); + readerToFields[reader] = fields; + foreach(var field in fields) + { + // update fieldToReader map + // Do a containskey firt to mimic java behavior + if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null) + fieldToReader[field] = reader; + } + + if (!ignoreStoredFields) + storedFieldReaders.Add(reader); // add to storedFieldReaders + readers.Add(reader); + + if (incRefReaders) + { + reader.IncRef(); + } + decrefOnClose.Add(incRefReaders); + } + + public override System.Object Clone() + { + try + { + return DoReopen(true); + } + catch (System.Exception ex) + { + throw new System.SystemException(ex.Message, ex); + } + } + + /// Tries to reopen the subreaders. + ///
+ /// If one or more subreaders could be re-opened (i. e. subReader.reopen() + /// returned a new instance != subReader), then a new ParallelReader instance + /// is returned, otherwise this instance is returned. + ///

+ /// A re-opened instance might share one or more subreaders with the old + /// instance. Index modification operations result in undefined behavior + /// when performed before the old instance is closed. + /// (see ). + ///

+ /// If subreaders are shared, then the reference count of those + /// readers is increased to ensure that the subreaders remain open + /// until the last referring reader is closed. + /// + ///

+ /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public override IndexReader Reopen() + { + lock (this) + { + return DoReopen(false); + } + } + + protected internal virtual IndexReader DoReopen(bool doClone) + { + EnsureOpen(); + + bool reopened = false; + IList newReaders = new List(); + + bool success = false; + + try + { + foreach(var oldReader in readers) + { + IndexReader newReader = null; + if (doClone) + { + newReader = (IndexReader) oldReader.Clone(); + } + else + { + newReader = oldReader.Reopen(); + } + newReaders.Add(newReader); + // if at least one of the subreaders was updated we remember that + // and return a new ParallelReader + if (newReader != oldReader) + { + reopened = true; + } + } + success = true; + } + finally + { + if (!success && reopened) + { + for (int i = 0; i < newReaders.Count; i++) + { + IndexReader r = newReaders[i]; + if (r != readers[i]) + { + try + { + r.Close(); + } + catch (System.IO.IOException) + { + // keep going - we want to clean up as much as possible + } + } + } + } + } + + if (reopened) + { + List newDecrefOnClose = new List(); + ParallelReader pr = new ParallelReader(); + for (int i = 0; i < readers.Count; i++) + { + IndexReader oldReader = readers[i]; + IndexReader newReader = newReaders[i]; + if (newReader == oldReader) + { + newDecrefOnClose.Add(true); + newReader.IncRef(); + } + else + { + // this is a new subreader instance, so on close() we don't + // decRef but close it + newDecrefOnClose.Add(false); + } + pr.Add(newReader, !storedFieldReaders.Contains(oldReader)); + } + pr.decrefOnClose = newDecrefOnClose; + pr.incRefReaders = incRefReaders; + return pr; + } + else + { + // No subreader was refreshed + return this; + } + } + + + public override int NumDocs() + { + // Don't call ensureOpen() here (it could affect performance) + return numDocs; + } + + public override int MaxDoc + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return maxDoc; + } + } + + public override bool HasDeletions + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return hasDeletions; + } + } + + // check first reader + public override bool IsDeleted(int n) + { + // Don't call ensureOpen() here (it could affect performance) + if (readers.Count > 0) + return readers[0].IsDeleted(n); + return false; + } + + // delete in all readers + protected internal override void DoDelete(int n) + { + foreach(var reader in readers) + { + reader.DeleteDocument(n); + } + hasDeletions = true; + } + + // undeleteAll in all readers + protected internal override void DoUndeleteAll() + { + foreach(var reader in readers) + { + reader.UndeleteAll(); + } + hasDeletions = false; + } + + // append fields from storedFieldReaders + public override Document Document(int n, FieldSelector fieldSelector) + { + EnsureOpen(); + Document result = new Document(); + foreach(IndexReader reader in storedFieldReaders) + { + bool include = (fieldSelector == null); + if (!include) + { + var fields = readerToFields[reader]; + foreach(var field in fields) + { + if (fieldSelector.Accept(field) != FieldSelectorResult.NO_LOAD) + { + include = true; + break; + } + } + } + if (include) + { + var fields = reader.Document(n, fieldSelector).GetFields(); + foreach(var field in fields) + { + result.Add(field); + } + } + } + return result; + } + + // get all vectors + public override ITermFreqVector[] GetTermFreqVectors(int n) + { + EnsureOpen(); + IList results = new List(); + foreach(var e in fieldToReader) + { + System.String field = e.Key; + IndexReader reader = e.Value; + + ITermFreqVector vector = reader.GetTermFreqVector(n, field); + if (vector != null) + results.Add(vector); + } + return results.ToArray(); + } + + public override ITermFreqVector GetTermFreqVector(int n, System.String field) + { + EnsureOpen(); + IndexReader reader = (fieldToReader[field]); + return reader == null?null:reader.GetTermFreqVector(n, field); + } + + + public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) + { + EnsureOpen(); + IndexReader reader = (fieldToReader[field]); + if (reader != null) + { + reader.GetTermFreqVector(docNumber, field, mapper); + } + } + + public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) + { + EnsureOpen(); + + foreach(var e in fieldToReader) + { + System.String field = e.Key; + IndexReader reader = e.Value; + reader.GetTermFreqVector(docNumber, field, mapper); + } + } + + public override bool HasNorms(System.String field) + { + EnsureOpen(); + IndexReader reader = fieldToReader[field]; + return reader != null && reader.HasNorms(field); + } + + public override byte[] Norms(System.String field) + { + EnsureOpen(); + IndexReader reader = fieldToReader[field]; + return reader == null?null:reader.Norms(field); + } + + public override void Norms(System.String field, byte[] result, int offset) + { + EnsureOpen(); + IndexReader reader = fieldToReader[field]; + if (reader != null) + reader.Norms(field, result, offset); + } + + protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed) + { + IndexReader reader = fieldToReader[field]; + if (reader != null) + reader.DoSetNorm(n, field, value_Renamed); + } + + public override TermEnum Terms() + { + EnsureOpen(); + return new ParallelTermEnum(this); + } + + public override TermEnum Terms(Term term) + { + EnsureOpen(); + return new ParallelTermEnum(this, term); + } + + public override int DocFreq(Term term) + { + EnsureOpen(); + IndexReader reader = fieldToReader[term.Field]; + return reader == null?0:reader.DocFreq(term); + } + + public override TermDocs TermDocs(Term term) + { + EnsureOpen(); + return new ParallelTermDocs(this, term); + } + + public override TermDocs TermDocs() + { + EnsureOpen(); + return new ParallelTermDocs(this); + } + + public override TermPositions TermPositions(Term term) + { + EnsureOpen(); + return new ParallelTermPositions(this, term); + } + + public override TermPositions TermPositions() + { + EnsureOpen(); + return new ParallelTermPositions(this); + } + + /// Checks recursively if all subreaders are up to date. + public override bool IsCurrent() + { + foreach (var reader in readers) + { + if (!reader.IsCurrent()) + { + return false; + } + } + + // all subreaders are up to date + return true; + } + + /// Checks recursively if all subindexes are optimized + public override bool IsOptimized() + { + foreach (var reader in readers) + { + if (!reader.IsOptimized()) + { + return false; + } + } + + // all subindexes are optimized + return true; + } + + + /// Not implemented. + /// UnsupportedOperationException + public override long Version + { + get { throw new System.NotSupportedException("ParallelReader does not support this method."); } + } + + // for testing + public /*internal*/ virtual IndexReader[] GetSubReaders() + { + return readers.ToArray(); + } + + protected internal override void DoCommit(IDictionary commitUserData) + { + foreach(var reader in readers) + reader.Commit(commitUserData); + } + + protected internal override void DoClose() + { + lock (this) + { + for (int i = 0; i < readers.Count; i++) + { + if (decrefOnClose[i]) + { + readers[i].DecRef(); + } + else + { + readers[i].Close(); + } + } + } + + Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this); + } + + public override System.Collections.Generic.ICollection GetFieldNames(IndexReader.FieldOption fieldNames) + { + EnsureOpen(); + ISet fieldSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); + foreach(var reader in readers) + { + ICollection names = reader.GetFieldNames(fieldNames); + fieldSet.UnionWith(names); + } + return fieldSet; + } + + private class ParallelTermEnum : TermEnum + { + private void InitBlock(ParallelReader enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ParallelReader enclosingInstance; + public ParallelReader Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private System.String field; + private IEnumerator fieldIterator; + private TermEnum termEnum; + + private bool isDisposed; + + public ParallelTermEnum(ParallelReader enclosingInstance) + { + InitBlock(enclosingInstance); + try + { + field = Enclosing_Instance.fieldToReader.Keys.First(); + } + catch (ArgumentOutOfRangeException) + { + // No fields, so keep field == null, termEnum == null + return; + } + if (field != null) + termEnum = Enclosing_Instance.fieldToReader[field].Terms(); + } + + public ParallelTermEnum(ParallelReader enclosingInstance, Term term) + { + InitBlock(enclosingInstance); + field = term.Field; + IndexReader reader = Enclosing_Instance.fieldToReader[field]; + if (reader != null) + termEnum = reader.Terms(term); + } + + public override bool Next() + { + if (termEnum == null) + return false; + + // another term in this field? + if (termEnum.Next() && (System.Object) termEnum.Term.Field == (System.Object) field) + return true; // yes, keep going + + termEnum.Close(); // close old termEnum + + // find the next field with terms, if any + if (fieldIterator == null) + { + var newList = new List(); + if (Enclosing_Instance.fieldToReader != null && Enclosing_Instance.fieldToReader.Count > 0) + { + var comparer = Enclosing_Instance.fieldToReader.Comparer; + foreach(var entry in Enclosing_Instance.fieldToReader.Keys.Where(x => comparer.Compare(x, field) >= 0)) + newList.Add(entry); + } + + fieldIterator = newList.Skip(1).GetEnumerator(); // Skip field to get next one + } + while (fieldIterator.MoveNext()) + { + field = fieldIterator.Current; + termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field)); + Term term = termEnum.Term; + if (term != null && (System.Object) term.Field == (System.Object) field) + return true; + else + termEnum.Close(); + } + + return false; // no more fields + } + + public override Term Term + { + get + { + if (termEnum == null) + return null; + + return termEnum.Term; + } + } + + public override int DocFreq() + { + if (termEnum == null) + return 0; + + return termEnum.DocFreq(); + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (termEnum != null) + termEnum.Close(); + } + + isDisposed = true; + } + } + + // wrap a TermDocs in order to support seek(Term) + private class ParallelTermDocs : TermDocs + { + private void InitBlock(ParallelReader enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ParallelReader enclosingInstance; + public ParallelReader Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + protected internal TermDocs termDocs; + + private bool isDisposed; + + public ParallelTermDocs(ParallelReader enclosingInstance) + { + InitBlock(enclosingInstance); + } + public ParallelTermDocs(ParallelReader enclosingInstance, Term term) + { + InitBlock(enclosingInstance); + if(term == null) + termDocs = (Enclosing_Instance.readers.Count == 0) + ? null + : Enclosing_Instance.readers[0].TermDocs(null); + else + Seek(term); + } + + public virtual int Doc + { + get { return termDocs.Doc; } + } + + public virtual int Freq + { + get { return termDocs.Freq; } + } + + public virtual void Seek(Term term) + { + IndexReader reader = Enclosing_Instance.fieldToReader[term.Field]; + termDocs = reader != null?reader.TermDocs(term):null; + } + + public virtual void Seek(TermEnum termEnum) + { + Seek(termEnum.Term); + } + + public virtual bool Next() + { + if (termDocs == null) + return false; + + return termDocs.Next(); + } + + public virtual int Read(int[] docs, int[] freqs) + { + if (termDocs == null) + return 0; + + return termDocs.Read(docs, freqs); + } + + public virtual bool SkipTo(int target) + { + if (termDocs == null) + return false; + + return termDocs.SkipTo(target); + } + + [Obsolete("Use Dispose() instead")] + public virtual void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (termDocs != null) + termDocs.Close(); + } + + isDisposed = true; + } + } + + private class ParallelTermPositions:ParallelTermDocs, TermPositions + { + private void InitBlock(ParallelReader enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ParallelReader enclosingInstance; + public new ParallelReader Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + public ParallelTermPositions(ParallelReader enclosingInstance):base(enclosingInstance) + { + InitBlock(enclosingInstance); + } + public ParallelTermPositions(ParallelReader enclosingInstance, Term term):base(enclosingInstance) + { + InitBlock(enclosingInstance); + Seek(term); + } + + public override void Seek(Term term) + { + IndexReader reader = Enclosing_Instance.fieldToReader[term.Field]; + termDocs = reader != null?reader.TermPositions(term):null; + } + + public virtual int NextPosition() + { + // It is an error to call this if there is no next position, e.g. if termDocs==null + return ((TermPositions) termDocs).NextPosition(); + } + + public virtual int PayloadLength + { + get { return ((TermPositions) termDocs).PayloadLength; } + } + + public virtual byte[] GetPayload(byte[] data, int offset) + { + return ((TermPositions) termDocs).GetPayload(data, offset); + } + + + // TODO: Remove warning after API has been finalized + + public virtual bool IsPayloadAvailable + { + get { return ((TermPositions) termDocs).IsPayloadAvailable; } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/Payload.cs b/external/Lucene.Net.Light/src/core/Index/Payload.cs new file mode 100644 index 0000000000..a6f391a024 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/Payload.cs @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using TokenStream = Lucene.Net.Analysis.TokenStream; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; + +namespace Lucene.Net.Index +{ + + /// A Payload is metadata that can be stored together with each occurrence + /// of a term. This metadata is stored inline in the posting list of the + /// specific term. + ///

+ /// To store payloads in the index a has to be used that + /// produces payload data. + ///

+ /// Use and + /// to retrieve the payloads from the index.
+ /// + ///

+ [Serializable] + public class Payload : System.ICloneable + { + /// the byte array containing the payload data + protected internal byte[] data; + + /// the offset within the byte array + protected internal int internalOffset; + + /// the length of the payload data + protected internal int internalLength; + + /// Creates an empty payload and does not allocate a byte array. + public Payload() + { + // nothing to do + } + + /// Creates a new payload with the the given array as data. + /// A reference to the passed-in array is held, i. e. no + /// copy is made. + /// + /// + /// the data of this payload + /// + public Payload(byte[] data):this(data, 0, data.Length) + { + } + + /// Creates a new payload with the the given array as data. + /// A reference to the passed-in array is held, i. e. no + /// copy is made. + /// + /// + /// the data of this payload + /// + /// the offset in the data byte array + /// + /// the length of the data + /// + public Payload(byte[] data, int offset, int length) + { + if (offset < 0 || offset + length > data.Length) + { + throw new System.ArgumentException(); + } + this.data = data; + this.internalOffset = offset; + this.internalLength = length; + } + + /// Sets this payloads data. + /// A reference to the passed-in array is held, i. e. no + /// copy is made. + /// + public virtual void SetData(byte[] value, int offset, int length) + { + this.data = value; + this.internalOffset = offset; + this.internalLength = length; + } + + /// Gets or sets a reference to the underlying byte array + /// that holds this payloads data. Data is not copied. + /// + public virtual void SetData(byte[] value) + { + SetData(value, 0, value.Length); + } + + /// Gets or sets a reference to the underlying byte array + /// that holds this payloads data. Data is not copied. + /// + public virtual byte[] GetData() + { + return this.data; + } + + /// Returns the offset in the underlying byte array + public virtual int Offset + { + get { return this.internalOffset; } + } + + /// Returns the length of the payload data. + public virtual int Length + { + get { return this.internalLength; } + } + + /// Returns the byte at the given index. + public virtual byte ByteAt(int index) + { + if (0 <= index && index < this.internalLength) + { + return this.data[this.internalOffset + index]; + } + throw new System. IndexOutOfRangeException("Index of bound " + index); + } + + /// Allocates a new byte array, copies the payload data into it and returns it. + public virtual byte[] ToByteArray() + { + byte[] retArray = new byte[this.internalLength]; + Array.Copy(this.data, this.internalOffset, retArray, 0, this.internalLength); + return retArray; + } + + /// Copies the payload data to a byte array. + /// + /// + /// the target byte array + /// + /// the offset in the target byte array + /// + public virtual void CopyTo(byte[] target, int targetOffset) + { + if (this.internalLength > target.Length + targetOffset) + { + throw new System.IndexOutOfRangeException(); + } + Array.Copy(this.data, this.internalOffset, target, targetOffset, this.internalLength); + } + + /// Clones this payload by creating a copy of the underlying + /// byte array. + /// + public virtual System.Object Clone() + { + try + { + // Start with a shallow copy of data + Payload clone = (Payload) base.MemberwiseClone(); + // Only copy the part of data that belongs to this Payload + if (internalOffset == 0 && internalLength == data.Length) + { + // It is the whole thing, so just clone it. + clone.data = new byte[data.Length]; + data.CopyTo(clone.data, 0); + } + else + { + // Just get the part + clone.data = this.ToByteArray(); + clone.internalOffset = 0; + } + return clone; + } + catch (System.Exception e) + { + throw new System.SystemException(e.Message, e); // shouldn't happen + } + } + + public override bool Equals(System.Object obj) + { + if (obj == this) + return true; + if (obj is Payload) + { + Payload other = (Payload) obj; + if (internalLength == other.internalLength) + { + for (int i = 0; i < internalLength; i++) + if (data[internalOffset + i] != other.data[other.internalOffset + i]) + return false; + return true; + } + else + return false; + } + else + return false; + } + + public override int GetHashCode() + { + return ArrayUtil.HashCode(data, internalOffset, internalOffset + internalLength); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/PositionBasedTermVectorMapper.cs b/external/Lucene.Net.Light/src/core/Index/PositionBasedTermVectorMapper.cs new file mode 100644 index 0000000000..af548a7b57 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/PositionBasedTermVectorMapper.cs @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// For each Field, store position by position information. It ignores frequency information + ///

+ /// This is not thread-safe. + ///

+ public class PositionBasedTermVectorMapper:TermVectorMapper + { + private IDictionary> fieldToTerms; + + private System.String currentField; + /// A Map of Integer and TVPositionInfo + private IDictionary currentPositions; + private bool storeOffsets; + + public PositionBasedTermVectorMapper():base(false, false) + { + } + + public PositionBasedTermVectorMapper(bool ignoringOffsets):base(false, ignoringOffsets) + { + } + + /// Never ignores positions. This mapper doesn't make much sense unless there are positions + /// false + public override bool IsIgnoringPositions + { + get { return false; } + } + + /// Callback for the TermVectorReader. + /// + /// + /// + /// + /// + /// + /// + /// + public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) + { + for (int i = 0; i < positions.Length; i++) + { + System.Int32 posVal = positions[i]; + TVPositionInfo pos = currentPositions[posVal]; + if (pos == null) + { + pos = new TVPositionInfo(positions[i], storeOffsets); + currentPositions[posVal] = pos; + } + pos.addTerm(term, offsets != null ? offsets[i] : TermVectorOffsetInfo.Null); + } + } + + /// Callback mechanism used by the TermVectorReader + /// The field being read + /// + /// The number of terms in the vector + /// + /// Whether offsets are available + /// + /// Whether positions are available + /// + public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) + { + if (storePositions == false) + { + throw new System.SystemException("You must store positions in order to use this Mapper"); + } + if (storeOffsets == true) + { + //ignoring offsets + } + fieldToTerms = new HashMap>(numTerms); + this.storeOffsets = storeOffsets; + currentField = field; + currentPositions = new HashMap(); + fieldToTerms[currentField] = currentPositions; + } + + /// Get the mapping between fields and terms, sorted by the comparator + /// + /// + /// A map between field names and a Map. The sub-Map key is the position as the integer, the value is <see cref="Lucene.Net.Index.PositionBasedTermVectorMapper.TVPositionInfo" />. + public virtual IDictionary> FieldToTerms + { + get { return fieldToTerms; } + } + + /// Container for a term at a position + public class TVPositionInfo + { + /// + /// The position of the term + /// + virtual public int Position + { + get + { + return position; + } + + } + /// Note, there may be multiple terms at the same position + /// A List of Strings + /// + virtual public IList Terms + { + get + { + return terms; + } + + } + /// Parallel list (to ) of TermVectorOffsetInfo objects. + /// There may be multiple entries since there may be multiple terms at a position + /// A List of TermVectorOffsetInfo objects, if offsets are store. + /// + virtual public IList Offsets + { + get + { + return offsets; + } + + } + private int position; + //a list of Strings + private IList terms; + //A list of TermVectorOffsetInfo + private IList offsets; + + + public TVPositionInfo(int position, bool storeOffsets) + { + this.position = position; + terms = new List(); + if (storeOffsets) + { + offsets = new List(); + } + } + + internal virtual void addTerm(System.String term, TermVectorOffsetInfo info) + { + terms.Add(term); + if (offsets != null) + { + offsets.Add(info); + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/RawPostingList.cs b/external/Lucene.Net.Light/src/core/Index/RawPostingList.cs new file mode 100644 index 0000000000..bffc2decd9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/RawPostingList.cs @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + + /// This is the base class for an in-memory posting list, + /// keyed by a Token. maintains a hash + /// table holding one instance of this per unique Token. + /// Consumers of TermsHash () must + /// subclass this class with its own concrete class. + /// FreqProxTermsWriter.PostingList is a private inner class used + /// for the freq/prox postings, and + /// TermVectorsTermsWriter.PostingList is a private inner class + /// used to hold TermVectors postings. + /// + + abstract class RawPostingList + { + internal static readonly int BYTES_SIZE; + internal int textStart; + internal int intStart; + internal int byteStart; + static RawPostingList() + { + BYTES_SIZE = DocumentsWriter.OBJECT_HEADER_BYTES + 3 * DocumentsWriter.INT_NUM_BYTE; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/ReadOnlyDirectoryReader.cs b/external/Lucene.Net.Light/src/core/Index/ReadOnlyDirectoryReader.cs new file mode 100644 index 0000000000..8f0f3b7386 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/ReadOnlyDirectoryReader.cs @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + public class ReadOnlyDirectoryReader:DirectoryReader + { + internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos sis, IndexDeletionPolicy deletionPolicy, int termInfosIndexDivisor):base(directory, sis, deletionPolicy, true, termInfosIndexDivisor) + { + } + + internal ReadOnlyDirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts, System.Collections.Generic.IDictionary oldNormsCache, bool doClone, int termInfosIndexDivisor) + : base(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) + { + } + + internal ReadOnlyDirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor):base(writer, infos, termInfosIndexDivisor) + { + } + + protected internal override void AcquireWriteLock() + { + ReadOnlySegmentReader.NoWrite(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/ReadOnlySegmentReader.cs b/external/Lucene.Net.Light/src/core/Index/ReadOnlySegmentReader.cs new file mode 100644 index 0000000000..3c7c9161fc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/ReadOnlySegmentReader.cs @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + public class ReadOnlySegmentReader:SegmentReader + { + + internal static void NoWrite() + { + throw new System.NotSupportedException("This IndexReader cannot make any changes to the index (it was opened with readOnly = true)"); + } + + protected internal override void AcquireWriteLock() + { + NoWrite(); + } + + // Not synchronized + public override bool IsDeleted(int n) + { + return deletedDocs != null && deletedDocs.Get(n); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/ReusableStringReader.cs b/external/Lucene.Net.Light/src/core/Index/ReusableStringReader.cs new file mode 100644 index 0000000000..54c1b7d600 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/ReusableStringReader.cs @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// Used by DocumentsWriter to implemented a StringReader + /// that can be reset to a new string; we use this when + /// tokenizing the string value from a Field. + /// + sealed class ReusableStringReader : System.IO.TextReader + { + internal int upto; + internal int left; + internal System.String s; + internal void Init(System.String s) + { + this.s = s; + left = s.Length; + this.upto = 0; + } + + public int Read(char[] c) + { + return Read(c, 0, c.Length); + } + + public override int Read(System.Char[] c, int off, int len) + { + if (left > len) + { + TextSupport.GetCharsFromString(s, upto, upto + len, c, off); + upto += len; + left -= len; + return len; + } + else if (0 == left) + { + // don't keep a reference (s could have been very large) + s = null; + return 0; + } + else + { + TextSupport.GetCharsFromString(s, upto, upto + left, c, off); + int r = left; + left = 0; + upto = s.Length; + return r; + } + } + + //[Obsolete("Use Dispose() instead")] + public override void Close() + { + Dispose(); + } + + public override int Read() + { + if (left > 0) + { + char ch = s[upto]; + upto += 1; + left -= 1; + return (int)ch; + } + return -1; + } + + public override int ReadBlock(char[] buffer, int index, int count) + { + return Read(buffer, index, count); + } + + public override string ReadLine() + { + int i; + for (i = upto; i < s.Length; i++) + { + char c = s[i]; + if (c == '\r' || c == '\n') + { + string result = s.Substring(upto, i - upto); + upto = i + 1; + left = s.Length - upto; + if (c == '\r' && upto < s.Length && s[upto] == '\n') + { + upto++; + left--; + } + return result; + } + } + if (i > upto) + { + return ReadToEnd(); + } + return null; + } + + public override int Peek() + { + if (left > 0) + { + return (int)s[upto]; + } + return -1; + } + + public override string ReadToEnd() + { + string result = s.Substring(upto, left); + left = 0; + upto = s.Length - 1; + return result; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentInfo.cs b/external/Lucene.Net.Light/src/core/Index/SegmentInfo.cs new file mode 100644 index 0000000000..697dda6cc1 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentInfo.cs @@ -0,0 +1,875 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using BitVector = Lucene.Net.Util.BitVector; + +namespace Lucene.Net.Index +{ + + /// Information about a segment such as it's name, directory, and files related + /// to the segment. + /// + /// *

NOTE: This API is new and still experimental + /// (subject to change suddenly in the next release)

+ ///

+ public sealed class SegmentInfo : System.ICloneable + { + + internal const int NO = - 1; // e.g. no norms; no deletes; + internal const int YES = 1; // e.g. have norms; have deletes; + internal const int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions + internal const int WITHOUT_GEN = 0; // a file name that has no GEN in it. + + public System.String name; // unique name in dir + public int docCount; // number of docs in seg + public Directory dir; // where segment resides + + private bool preLockless; // true if this is a segments file written before + // lock-less commits (2.1) + + private long delGen; // current generation of del file; NO if there + // are no deletes; CHECK_DIR if it's a pre-2.1 segment + // (and we must check filesystem); YES or higher if + // there are deletes at generation N + + private long[] normGen; // current generation of each field's norm file. + // If this array is null, for lockLess this means no + // separate norms. For preLockLess this means we must + // check filesystem. If this array is not null, its + // values mean: NO says this field has no separate + // norms; CHECK_DIR says it is a preLockLess segment and + // filesystem must be checked; >= YES says this field + // has separate norms with the specified generation + + private sbyte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's + // pre-2.1 (ie, must check file system to see + // if .cfs and .nrm exist) + + private bool hasSingleNormFile; // true if this segment maintains norms in a single file; + // false otherwise + // this is currently false for segments populated by DocumentWriter + // and true for newly created merged segments (both + // compound and non compound). + + private IList files; // cached list of files that this segment uses + // in the Directory + + internal long sizeInBytes = - 1; // total byte size of all of our files (computed on demand) + + private int docStoreOffset; // if this segment shares stored fields & vectors, this + // offset is where in that file this segment's docs begin + private System.String docStoreSegment; // name used to derive fields/vectors file we share with + // other segments + private bool docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) + + private int delCount; // How many deleted docs in this segment, or -1 if not yet known + // (if it's an older index) + + private bool hasProx; // True if this segment has any fields with omitTermFreqAndPositions==false + + private IDictionary diagnostics; + + public override System.String ToString() + { + return "si: " + dir.ToString() + " " + name + " docCount: " + docCount + " delCount: " + delCount + " delFileName: " + GetDelFileName(); + } + + public SegmentInfo(System.String name, int docCount, Directory dir) + { + this.name = name; + this.docCount = docCount; + this.dir = dir; + delGen = NO; + isCompoundFile = (sbyte) (CHECK_DIR); + preLockless = true; + hasSingleNormFile = false; + docStoreOffset = - 1; + docStoreSegment = name; + docStoreIsCompoundFile = false; + delCount = 0; + hasProx = true; + } + + public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile):this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false, true) + { + } + + public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, System.String docStoreSegment, bool docStoreIsCompoundFile, bool hasProx):this(name, docCount, dir) + { + this.isCompoundFile = (sbyte) (isCompoundFile?YES:NO); + this.hasSingleNormFile = hasSingleNormFile; + preLockless = false; + this.docStoreOffset = docStoreOffset; + this.docStoreSegment = docStoreSegment; + this.docStoreIsCompoundFile = docStoreIsCompoundFile; + this.hasProx = hasProx; + delCount = 0; + System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null, "dso=" + docStoreOffset + " dss=" + docStoreSegment + " docCount=" + docCount); + } + + /// Copy everything from src SegmentInfo into our instance. + internal void Reset(SegmentInfo src) + { + ClearFiles(); + name = src.name; + docCount = src.docCount; + dir = src.dir; + preLockless = src.preLockless; + delGen = src.delGen; + docStoreOffset = src.docStoreOffset; + docStoreIsCompoundFile = src.docStoreIsCompoundFile; + if (src.normGen == null) + { + normGen = null; + } + else + { + normGen = new long[src.normGen.Length]; + Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length); + } + isCompoundFile = src.isCompoundFile; + hasSingleNormFile = src.hasSingleNormFile; + delCount = src.delCount; + } + + public IDictionary Diagnostics + { + get { return diagnostics; } + internal set { this.diagnostics = value; } + } + + /// Construct a new SegmentInfo instance by reading a + /// previously saved SegmentInfo from input. + /// + /// + /// directory to load from + /// + /// format of the segments info file + /// + /// input handle to read segment info from + /// + internal SegmentInfo(Directory dir, int format, IndexInput input) + { + this.dir = dir; + name = input.ReadString(); + docCount = input.ReadInt(); + if (format <= SegmentInfos.FORMAT_LOCKLESS) + { + delGen = input.ReadLong(); + if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) + { + docStoreOffset = input.ReadInt(); + if (docStoreOffset != - 1) + { + docStoreSegment = input.ReadString(); + docStoreIsCompoundFile = (1 == input.ReadByte()); + } + else + { + docStoreSegment = name; + docStoreIsCompoundFile = false; + } + } + else + { + docStoreOffset = - 1; + docStoreSegment = name; + docStoreIsCompoundFile = false; + } + if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) + { + hasSingleNormFile = (1 == input.ReadByte()); + } + else + { + hasSingleNormFile = false; + } + int numNormGen = input.ReadInt(); + if (numNormGen == NO) + { + normGen = null; + } + else + { + normGen = new long[numNormGen]; + for (int j = 0; j < numNormGen; j++) + { + normGen[j] = input.ReadLong(); + } + } + isCompoundFile = (sbyte) input.ReadByte(); + preLockless = (isCompoundFile == CHECK_DIR); + if (format <= SegmentInfos.FORMAT_DEL_COUNT) + { + delCount = input.ReadInt(); + System.Diagnostics.Debug.Assert(delCount <= docCount); + } + else + delCount = - 1; + if (format <= SegmentInfos.FORMAT_HAS_PROX) + hasProx = input.ReadByte() == 1; + else + hasProx = true; + + if (format <= SegmentInfos.FORMAT_DIAGNOSTICS) + { + diagnostics = input.ReadStringStringMap(); + } + else + { + diagnostics = new Dictionary(); + } + } + else + { + delGen = CHECK_DIR; + normGen = null; + isCompoundFile = (sbyte) (CHECK_DIR); + preLockless = true; + hasSingleNormFile = false; + docStoreOffset = - 1; + docStoreIsCompoundFile = false; + docStoreSegment = null; + delCount = - 1; + hasProx = true; + diagnostics = new Dictionary(); + } + } + + internal void SetNumFields(int numFields) + { + if (normGen == null) + { + // normGen is null if we loaded a pre-2.1 segment + // file, or, if this segments file hasn't had any + // norms set against it yet: + normGen = new long[numFields]; + + if (preLockless) + { + // Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know + // we have to check filesystem for norm files, because this is prelockless. + } + else + { + // This is a FORMAT_LOCKLESS segment, which means + // there are no separate norms: + for (int i = 0; i < numFields; i++) + { + normGen[i] = NO; + } + } + } + } + + /// Returns total size in bytes of all of files used by + /// this segment. + /// + public long SizeInBytes() + { + if (sizeInBytes == - 1) + { + IList files = Files(); + int size = files.Count; + sizeInBytes = 0; + for (int i = 0; i < size; i++) + { + System.String fileName = files[i]; + // We don't count bytes used by a shared doc store + // against this segment: + if (docStoreOffset == - 1 || !IndexFileNames.IsDocStoreFile(fileName)) + sizeInBytes += dir.FileLength(fileName); + } + } + return sizeInBytes; + } + + public bool HasDeletions() + { + // Cases: + // + // delGen == NO: this means this segment was written + // by the LOCKLESS code and for certain does not have + // deletions yet + // + // delGen == CHECK_DIR: this means this segment was written by + // pre-LOCKLESS code which means we must check + // directory to see if .del file exists + // + // delGen >= YES: this means this segment was written by + // the LOCKLESS code and for certain has + // deletions + // + if (delGen == NO) + { + return false; + } + else if (delGen >= YES) + { + return true; + } + else + { + return dir.FileExists(GetDelFileName()); + } + } + + internal void AdvanceDelGen() + { + // delGen 0 is reserved for pre-LOCKLESS format + if (delGen == NO) + { + delGen = YES; + } + else + { + delGen++; + } + ClearFiles(); + } + + internal void ClearDelGen() + { + delGen = NO; + ClearFiles(); + } + + public System.Object Clone() + { + SegmentInfo si = new SegmentInfo(name, docCount, dir); + si.isCompoundFile = isCompoundFile; + si.delGen = delGen; + si.delCount = delCount; + si.hasProx = hasProx; + si.preLockless = preLockless; + si.hasSingleNormFile = hasSingleNormFile; + si.diagnostics = new HashMap(this.diagnostics); + if (this.diagnostics != null) + { + si.diagnostics = new System.Collections.Generic.Dictionary(); + foreach (string o in diagnostics.Keys) + { + si.diagnostics.Add(o,diagnostics[o]); + } + } + if (normGen != null) + { + si.normGen = new long[normGen.Length]; + normGen.CopyTo(si.normGen, 0); + } + si.docStoreOffset = docStoreOffset; + si.docStoreSegment = docStoreSegment; + si.docStoreIsCompoundFile = docStoreIsCompoundFile; + if (this.files != null) + { + si.files = new System.Collections.Generic.List(); + foreach (string file in files) + { + si.files.Add(file); + } + } + + return si; + } + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public System.String GetDelFileName() + { + if (delGen == NO) + { + // In this case we know there is no deletion filename + // against this segment + return null; + } + else + { + // If delGen is CHECK_DIR, it's the pre-lockless-commit file format + return IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); + } + } + + /// Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX). + /// + /// + /// the field index to check + /// + public bool HasSeparateNorms(int fieldNumber) + { + if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) + { + // Must fallback to directory file exists check: + System.String fileName = name + ".s" + fieldNumber; + return dir.FileExists(fileName); + } + else if (normGen == null || normGen[fieldNumber] == NO) + { + return false; + } + else + { + return true; + } + } + + /// Returns true if any fields in this segment have separate norms. + public bool HasSeparateNorms() + { + if (normGen == null) + { + if (!preLockless) + { + // This means we were created w/ LOCKLESS code and no + // norms are written yet: + return false; + } + else + { + // This means this segment was saved with pre-LOCKLESS + // code. So we must fallback to the original + // directory list check: + System.String[] result = dir.ListAll(); + if (result == null) + { + throw new System.IO.IOException("cannot read directory " + dir + ": ListAll() returned null"); + } + + IndexFileNameFilter filter = IndexFileNameFilter.Filter; + System.String pattern; + pattern = name + ".s"; + int patternLength = pattern.Length; + for (int i = 0; i < result.Length; i++) + { + string fileName = result[i]; + if (filter.Accept(null, fileName) && fileName.StartsWith(pattern) && char.IsDigit(fileName[patternLength])) + return true; + } + return false; + } + } + else + { + // This means this segment was saved with LOCKLESS + // code so we first check whether any normGen's are >= 1 + // (meaning they definitely have separate norms): + for (int i = 0; i < normGen.Length; i++) + { + if (normGen[i] >= YES) + { + return true; + } + } + // Next we look for any == 0. These cases were + // pre-LOCKLESS and must be checked in directory: + for (int i = 0; i < normGen.Length; i++) + { + if (normGen[i] == CHECK_DIR) + { + if (HasSeparateNorms(i)) + { + return true; + } + } + } + } + + return false; + } + + /// Increment the generation count for the norms file for + /// this field. + /// + /// + /// field whose norm file will be rewritten + /// + internal void AdvanceNormGen(int fieldIndex) + { + if (normGen[fieldIndex] == NO) + { + normGen[fieldIndex] = YES; + } + else + { + normGen[fieldIndex]++; + } + ClearFiles(); + } + + /// Get the file name for the norms file for this field. + /// + /// + /// field index + /// + public System.String GetNormFileName(int number) + { + System.String prefix; + + long gen; + if (normGen == null) + { + gen = CHECK_DIR; + } + else + { + gen = normGen[number]; + } + + if (HasSeparateNorms(number)) + { + // case 1: separate norm + prefix = ".s"; + return IndexFileNames.FileNameFromGeneration(name, prefix + number, gen); + } + + if (hasSingleNormFile) + { + // case 2: lockless (or nrm file exists) - single file for all norms + prefix = "." + IndexFileNames.NORMS_EXTENSION; + return IndexFileNames.FileNameFromGeneration(name, prefix, WITHOUT_GEN); + } + + // case 3: norm file for each field + prefix = ".f"; + return IndexFileNames.FileNameFromGeneration(name, prefix + number, WITHOUT_GEN); + } + + /// Returns true if this segment is stored as a compound + /// file; else, false. + /// + internal void SetUseCompoundFile(bool value) + { + if (value) + { + this.isCompoundFile = (sbyte) (YES); + } + else + { + this.isCompoundFile = (sbyte) (NO); + } + ClearFiles(); + } + + /// Returns true if this segment is stored as a compound + /// file; else, false. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public bool GetUseCompoundFile() + { + if (isCompoundFile == NO) + { + return false; + } + if (isCompoundFile == YES) + { + return true; + } + return dir.FileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); + } + + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public int GetDelCount() + { + if (delCount == - 1) + { + if (HasDeletions()) + { + System.String delFileName = GetDelFileName(); + delCount = new BitVector(dir, delFileName).Count(); + } + else + delCount = 0; + } + System.Diagnostics.Debug.Assert(delCount <= docCount); + return delCount; + } + + internal void SetDelCount(int delCount) + { + this.delCount = delCount; + System.Diagnostics.Debug.Assert(delCount <= docCount); + } + + public int DocStoreOffset + { + get { return docStoreOffset; } + internal set + { + docStoreOffset = value; + ClearFiles(); + } + } + + public bool DocStoreIsCompoundFile + { + get { return docStoreIsCompoundFile; } + internal set + { + docStoreIsCompoundFile = value; + ClearFiles(); + } + } + + public string DocStoreSegment + { + get { return docStoreSegment; } + } + + internal void SetDocStore(int offset, System.String segment, bool isCompoundFile) + { + docStoreOffset = offset; + docStoreSegment = segment; + docStoreIsCompoundFile = isCompoundFile; + } + + /// Save this segment's info. + internal void Write(IndexOutput output) + { + output.WriteString(name); + output.WriteInt(docCount); + output.WriteLong(delGen); + output.WriteInt(docStoreOffset); + if (docStoreOffset != - 1) + { + output.WriteString(docStoreSegment); + output.WriteByte((byte) (docStoreIsCompoundFile?1:0)); + } + + output.WriteByte((byte) (hasSingleNormFile?1:0)); + if (normGen == null) + { + output.WriteInt(NO); + } + else + { + output.WriteInt(normGen.Length); + for (int j = 0; j < normGen.Length; j++) + { + output.WriteLong(normGen[j]); + } + } + output.WriteByte((byte) isCompoundFile); + output.WriteInt(delCount); + output.WriteByte((byte) (hasProx?1:0)); + output.WriteStringStringMap(diagnostics); + } + + public bool HasProx + { + get { return hasProx; } + internal set + { + this.hasProx = value; + ClearFiles(); + } + } + + private void AddIfExists(IList files, System.String fileName) + { + if (dir.FileExists(fileName)) + files.Add(fileName); + } + + /* + * Return all files referenced by this SegmentInfo. The + * returns List is a locally cached List so you should not + * modify it. + */ + + public IList Files() + { + + if (files != null) + { + // Already cached: + return files; + } + + var fileList = new System.Collections.Generic.List(); + + bool useCompoundFile = GetUseCompoundFile(); + + if (useCompoundFile) + { + fileList.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); + } + else + { + System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; + for (int i = 0; i < exts.Length; i++) + AddIfExists(fileList, name + "." + exts[i]); + } + + if (docStoreOffset != - 1) + { + // We are sharing doc stores (stored fields, term + // vectors) with other segments + System.Diagnostics.Debug.Assert(docStoreSegment != null); + if (docStoreIsCompoundFile) + { + fileList.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION); + } + else + { + System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS; + for (int i = 0; i < exts.Length; i++) + AddIfExists(fileList, docStoreSegment + "." + exts[i]); + } + } + else if (!useCompoundFile) + { + // We are not sharing, and, these files were not + // included in the compound file + System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS; + for (int i = 0; i < exts.Length; i++) + AddIfExists(fileList, name + "." + exts[i]); + } + + System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); + if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName))) + { + fileList.Add(delFileName); + } + + // Careful logic for norms files + if (normGen != null) + { + for (int i = 0; i < normGen.Length; i++) + { + long gen = normGen[i]; + if (gen >= YES) + { + // Definitely a separate norm file, with generation: + fileList.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); + } + else if (NO == gen) + { + // No separate norms but maybe plain norms + // in the non compound file case: + if (!hasSingleNormFile && !useCompoundFile) + { + System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; + if (dir.FileExists(fileName)) + { + fileList.Add(fileName); + } + } + } + else if (CHECK_DIR == gen) + { + // Pre-2.1: we have to check file existence + System.String fileName = null; + if (useCompoundFile) + { + fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; + } + else if (!hasSingleNormFile) + { + fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; + } + if (fileName != null && dir.FileExists(fileName)) + { + fileList.Add(fileName); + } + } + } + } + else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) + { + // Pre-2.1: we have to scan the dir to find all + // matching _X.sN/_X.fN files for our segment: + System.String prefix; + if (useCompoundFile) + prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; + else + prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION; + int prefixLength = prefix.Length; + System.String[] allFiles = dir.ListAll(); + IndexFileNameFilter filter = IndexFileNameFilter.Filter; + for (int i = 0; i < allFiles.Length; i++) + { + System.String fileName = allFiles[i]; + if (filter.Accept(null, fileName) && fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix)) + { + fileList.Add(fileName); + } + } + } + //System.Diagnostics.Debug.Assert(); + files = fileList; + return files; + } + + /* Called whenever any change is made that affects which + * files this segment has. */ + private void ClearFiles() + { + files = null; + sizeInBytes = - 1; + } + + /// Used for debugging + public System.String SegString(Directory dir) + { + System.String cfs; + try + { + if (GetUseCompoundFile()) + cfs = "c"; + else + cfs = "C"; + } + catch (System.IO.IOException) + { + cfs = "?"; + } + + System.String docStore; + + if (docStoreOffset != - 1) + docStore = "->" + docStoreSegment; + else + docStore = ""; + + return name + ":" + cfs + (this.dir == dir?"":"x") + docCount + docStore; + } + + /// We consider another SegmentInfo instance equal if it + /// has the same dir and same name. + /// + public override bool Equals(System.Object obj) + { + if (this == obj) return true; + + if (obj is SegmentInfo) + { + SegmentInfo other = (SegmentInfo) obj; + return other.dir == dir && other.name.Equals(name); + } + return false; + } + + public override int GetHashCode() + { + return dir.GetHashCode() + name.GetHashCode(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentInfos.cs b/external/Lucene.Net.Light/src/core/Index/SegmentInfos.cs new file mode 100644 index 0000000000..ca5297e330 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentInfos.cs @@ -0,0 +1,1074 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.IO; +using Lucene.Net.Support; +using ChecksumIndexInput = Lucene.Net.Store.ChecksumIndexInput; +using ChecksumIndexOutput = Lucene.Net.Store.ChecksumIndexOutput; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using NoSuchDirectoryException = Lucene.Net.Store.NoSuchDirectoryException; + +namespace Lucene.Net.Index +{ + + /// A collection of segmentInfo objects with methods for operating on + /// those segments in relation to the file system. + /// + ///

NOTE: This API is new and still experimental + /// (subject to change suddenly in the next release)

+ ///

+ [Serializable] + public sealed class SegmentInfos : List, ICloneable + { + private class AnonymousClassFindSegmentsFile:FindSegmentsFile + { + private void InitBlock(SegmentInfos enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SegmentInfos enclosingInstance; + public SegmentInfos Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFindSegmentsFile(SegmentInfos enclosingInstance, Lucene.Net.Store.Directory Param1):base(Param1) + { + InitBlock(enclosingInstance); + } + + public /*protected internal*/ override System.Object DoBody(System.String segmentFileName) + { + Enclosing_Instance.Read(directory, segmentFileName); + return null; + } + } + /// The file format version, a negative number. + /* Works since counter, the old 1st entry, is always >= 0 */ + public const int FORMAT = - 1; + + /// This format adds details used for lockless commits. It differs + /// slightly from the previous format in that file names + /// are never re-used (write once). Instead, each file is + /// written to the next generation. For example, + /// segments_1, segments_2, etc. This allows us to not use + /// a commit lock. See file + /// formats for details. + /// + public const int FORMAT_LOCKLESS = - 2; + + /// This format adds a "hasSingleNormFile" flag into each segment info. + /// See LUCENE-756 + /// for details. + /// + public const int FORMAT_SINGLE_NORM_FILE = - 3; + + /// This format allows multiple segments to share a single + /// vectors and stored fields file. + /// + public const int FORMAT_SHARED_DOC_STORE = - 4; + + /// This format adds a checksum at the end of the file to + /// ensure all bytes were successfully written. + /// + public const int FORMAT_CHECKSUM = - 5; + + /// This format adds the deletion count for each segment. + /// This way IndexWriter can efficiently report numDocs(). + /// + public const int FORMAT_DEL_COUNT = - 6; + + /// This format adds the boolean hasProx to record if any + /// fields in the segment store prox information (ie, have + /// omitTermFreqAndPositions==false) + /// + public const int FORMAT_HAS_PROX = - 7; + + /// This format adds optional commit userData (String) storage. + public const int FORMAT_USER_DATA = - 8; + + /// This format adds optional per-segment String + /// dianostics storage, and switches userData to Map + /// + public const int FORMAT_DIAGNOSTICS = - 9; + + /* This must always point to the most recent file format. */ + internal static readonly int CURRENT_FORMAT = FORMAT_DIAGNOSTICS; + + public int counter = 0; // used to name new segments + /// counts how often the index has been changed by adding or deleting docs. + /// starting with the current time in milliseconds forces to create unique version numbers. + /// + private long version = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond); + + private long generation = 0; // generation of the "segments_N" for the next commit + private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read + // or wrote; this is normally the same as generation except if + // there was an IOException that had interrupted a commit + + private IDictionary userData = new HashMap(); // Opaque Map that user can specify during IndexWriter.commit + + /// If non-null, information about loading segments_N files + /// + /// + private static System.IO.StreamWriter infoStream; + + public SegmentInfo Info(int i) + { + return (SegmentInfo) this[i]; + } + + /// Get the generation (N) of the current segments_N file + /// from a list of files. + /// + /// + /// -- array of file names to check + /// + public static long GetCurrentSegmentGeneration(System.String[] files) + { + if (files == null) + { + return - 1; + } + long max = - 1; + for (int i = 0; i < files.Length; i++) + { + System.String file = files[i]; + if (file.StartsWith(IndexFileNames.SEGMENTS) && !file.Equals(IndexFileNames.SEGMENTS_GEN)) + { + long gen = GenerationFromSegmentsFileName(file); + if (gen > max) + { + max = gen; + } + } + } + return max; + } + + /// Get the generation (N) of the current segments_N file + /// in the directory. + /// + /// + /// -- directory to search for the latest segments_N file + /// + public static long GetCurrentSegmentGeneration(Directory directory) + { + try + { + return GetCurrentSegmentGeneration(directory.ListAll()); + } + catch (NoSuchDirectoryException) + { + return - 1; + } + } + + /// Get the filename of the current segments_N file + /// from a list of files. + /// + /// + /// -- array of file names to check + /// + + public static System.String GetCurrentSegmentFileName(System.String[] files) + { + return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", GetCurrentSegmentGeneration(files)); + } + + /// Get the filename of the current segments_N file + /// in the directory. + /// + /// + /// -- directory to search for the latest segments_N file + /// + public static System.String GetCurrentSegmentFileName(Directory directory) + { + return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", GetCurrentSegmentGeneration(directory)); + } + + /// Get the segments_N filename in use by this segment infos. + public System.String GetCurrentSegmentFileName() + { + return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", lastGeneration); + } + + /// Parse the generation off the segments file name and + /// return it. + /// + public static long GenerationFromSegmentsFileName(System.String fileName) + { + if (fileName.Equals(IndexFileNames.SEGMENTS)) + { + return 0; + } + else if (fileName.StartsWith(IndexFileNames.SEGMENTS)) + { + return Number.ToInt64(fileName.Substring(1 + IndexFileNames.SEGMENTS.Length)); + } + else + { + throw new System.ArgumentException("fileName \"" + fileName + "\" is not a segments file"); + } + } + + + /// Get the next segments_N filename that will be written. + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public System.String GetNextSegmentFileName() + { + long nextGeneration; + + if (generation == - 1) + { + nextGeneration = 1; + } + else + { + nextGeneration = generation + 1; + } + return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", nextGeneration); + } + + /// Read a particular segmentFileName. Note that this may + /// throw an IOException if a commit is in process. + /// + /// + /// -- directory containing the segments file + /// + /// -- segment file to load + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public void Read(Directory directory, System.String segmentFileName) + { + bool success = false; + + // Clear any previous segments: + Clear(); + + var input = new ChecksumIndexInput(directory.OpenInput(segmentFileName)); + + generation = GenerationFromSegmentsFileName(segmentFileName); + + lastGeneration = generation; + + try + { + int format = input.ReadInt(); + if (format < 0) + { + // file contains explicit format info + // check that it is a format we can understand + if (format < CURRENT_FORMAT) + throw new CorruptIndexException("Unknown format version: " + format); + version = input.ReadLong(); // read version + counter = input.ReadInt(); // read counter + } + else + { + // file is in old format without explicit format info + counter = format; + } + + for (int i = input.ReadInt(); i > 0; i--) + { + // read segmentInfos + Add(new SegmentInfo(directory, format, input)); + } + + if (format >= 0) + { + // in old format the version number may be at the end of the file + if (input.FilePointer >= input.Length()) + version = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond); + // old file format without version number + else + version = input.ReadLong(); // read version + } + + if (format <= FORMAT_USER_DATA) + { + if (format <= FORMAT_DIAGNOSTICS) + { + userData = input.ReadStringStringMap(); + } + else if (0 != input.ReadByte()) + { + // TODO: Should be read-only map + userData = new HashMap {{"userData", input.ReadString()}}; + } + else + { + // TODO: Should be empty read-only map + userData = new HashMap(); + } + } + else + { + // TODO: Should be empty read-only map + userData = new HashMap(); + } + + if (format <= FORMAT_CHECKSUM) + { + long checksumNow = input.Checksum; + long checksumThen = input.ReadLong(); + if (checksumNow != checksumThen) + throw new CorruptIndexException("checksum mismatch in segments file"); + } + success = true; + } + finally + { + input.Close(); + if (!success) + { + // Clear any segment infos we had loaded so we + // have a clean slate on retry: + Clear(); + } + } + } + + /// This version of read uses the retry logic (for lock-less + /// commits) to find the right segments file to load. + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public void Read(Directory directory) + { + + generation = lastGeneration = - 1; + + new AnonymousClassFindSegmentsFile(this, directory).Run(); + } + + // Only non-null after prepareCommit has been called and + // before finishCommit is called + internal ChecksumIndexOutput pendingSegnOutput; + + private void Write(Directory directory) + { + + System.String segmentFileName = GetNextSegmentFileName(); + + // Always advance the generation on write: + if (generation == - 1) + { + generation = 1; + } + else + { + generation++; + } + + var segnOutput = new ChecksumIndexOutput(directory.CreateOutput(segmentFileName)); + + bool success = false; + + try + { + segnOutput.WriteInt(CURRENT_FORMAT); // write FORMAT + segnOutput.WriteLong(++version); // every write changes + // the index + segnOutput.WriteInt(counter); // write counter + segnOutput.WriteInt(Count); // write infos + for (int i = 0; i < Count; i++) + { + Info(i).Write(segnOutput); + } + segnOutput.WriteStringStringMap(userData); + segnOutput.PrepareCommit(); + success = true; + pendingSegnOutput = segnOutput; + } + finally + { + if (!success) + { + // We hit an exception above; try to close the file + // but suppress any exception: + try + { + segnOutput.Close(); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + } + try + { + // Try not to leave a truncated segments_N file in + // the index: + directory.DeleteFile(segmentFileName); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + } + } + } + } + + /// Returns a copy of this instance, also copying each + /// SegmentInfo. + /// + + public System.Object Clone() + { + SegmentInfos sis = new SegmentInfos(); + for (int i = 0; i < this.Count; i++) + { + sis.Add((SegmentInfo)this[i].Clone()); + } + sis.counter = this.counter; + sis.generation = this.generation; + sis.lastGeneration = this.lastGeneration; + // sis.pendingSegnOutput = this.pendingSegnOutput; // {{Aroush-2.9}} needed? + sis.userData = new HashMap(userData); + sis.version = this.version; + return sis; + } + + /// version number when this SegmentInfos was generated. + public long Version + { + get { return version; } + } + + public long Generation + { + get { return generation; } + } + + public long LastGeneration + { + get { return lastGeneration; } + } + + /// Current version number from segments file. + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public static long ReadCurrentVersion(Directory directory) + { + // Fully read the segments file: this ensures that it's + // completely written so that if + // IndexWriter.prepareCommit has been called (but not + // yet commit), then the reader will still see itself as + // current: + var sis = new SegmentInfos(); + sis.Read(directory); + return sis.version; + //return (long) ((System.Int64) new AnonymousClassFindSegmentsFile1(directory).Run()); + //DIGY: AnonymousClassFindSegmentsFile1 can safely be deleted + } + + /// Returns userData from latest segments file + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public static System.Collections.Generic.IDictionary ReadCurrentUserData(Directory directory) + { + var sis = new SegmentInfos(); + sis.Read(directory); + return sis.UserData; + } + + /// If non-null, information about retries when loading + /// the segments file will be printed to this. + /// + public static void SetInfoStream(System.IO.StreamWriter infoStream) + { + SegmentInfos.infoStream = infoStream; + } + + /* Advanced configuration of retry logic in loading + segments_N file */ + private static int defaultGenFileRetryCount = 10; + private static int defaultGenFileRetryPauseMsec = 50; + private static int defaultGenLookaheadCount = 10; + + /// Advanced: Gets or sets how many times to try loading the + /// segments.gen file contents to determine current segment + /// generation. This file is only referenced when the + /// primary method (listing the directory) fails. + /// + public static int DefaultGenFileRetryCount + { + get { return defaultGenFileRetryCount; } + set { defaultGenFileRetryCount = value; } + } + + public static int DefaultGenFileRetryPauseMsec + { + set { defaultGenFileRetryPauseMsec = value; } + get { return defaultGenFileRetryPauseMsec; } + } + + /// Advanced: set how many times to try incrementing the + /// gen when loading the segments file. This only runs if + /// the primary (listing directory) and secondary (opening + /// segments.gen file) methods fail to find the segments + /// file. + /// + public static int DefaultGenLookaheadCount + { + set { defaultGenLookaheadCount = value; } + get { return defaultGenLookaheadCount; } + } + + /// + /// + public static StreamWriter InfoStream + { + get { return infoStream; } + } + + private static void Message(System.String message) + { + if (infoStream != null) + { + infoStream.WriteLine("SIS [" + ThreadClass.Current().Name + "]: " + message); + } + } + + /// Utility class for executing code that needs to do + /// something with the current segments file. This is + /// necessary with lock-less commits because from the time + /// you locate the current segments file name, until you + /// actually open it, read its contents, or check modified + /// time, etc., it could have been deleted due to a writer + /// commit finishing. + /// + public abstract class FindSegmentsFile + { + + internal Directory directory; + + protected FindSegmentsFile(Directory directory) + { + this.directory = directory; + } + + public System.Object Run() + { + return Run(null); + } + + public System.Object Run(IndexCommit commit) + { + if (commit != null) + { + if (directory != commit.Directory) + throw new System.IO.IOException("the specified commit does not match the specified Directory"); + return DoBody(commit.SegmentsFileName); + } + + System.String segmentFileName = null; + long lastGen = - 1; + long gen = 0; + int genLookaheadCount = 0; + System.IO.IOException exc = null; + bool retry = false; + + int method = 0; + + // Loop until we succeed in calling doBody() without + // hitting an IOException. An IOException most likely + // means a commit was in process and has finished, in + // the time it took us to load the now-old infos files + // (and segments files). It's also possible it's a + // true error (corrupt index). To distinguish these, + // on each retry we must see "forward progress" on + // which generation we are trying to load. If we + // don't, then the original error is real and we throw + // it. + + // We have three methods for determining the current + // generation. We try the first two in parallel, and + // fall back to the third when necessary. + + while (true) + { + + if (0 == method) + { + + // Method 1: list the directory and use the highest + // segments_N file. This method works well as long + // as there is no stale caching on the directory + // contents (NOTE: NFS clients often have such stale + // caching): + System.String[] files = null; + + long genA = - 1; + + files = directory.ListAll(); + + if (files != null) + genA = Lucene.Net.Index.SegmentInfos.GetCurrentSegmentGeneration(files); + + Lucene.Net.Index.SegmentInfos.Message("directory listing genA=" + genA); + + // Method 2: open segments.gen and read its + // contents. Then we take the larger of the two + // gens. This way, if either approach is hitting + // a stale cache (NFS) we have a better chance of + // getting the right generation. + long genB = - 1; + for (int i = 0; i < Lucene.Net.Index.SegmentInfos.defaultGenFileRetryCount; i++) + { + IndexInput genInput = null; + try + { + genInput = directory.OpenInput(IndexFileNames.SEGMENTS_GEN); + } + catch (System.IO.FileNotFoundException e) + { + Lucene.Net.Index.SegmentInfos.Message("segments.gen open: FileNotFoundException " + e); + break; + } + catch (System.IO.IOException e) + { + Lucene.Net.Index.SegmentInfos.Message("segments.gen open: IOException " + e); + } + + if (genInput != null) + { + try + { + int version = genInput.ReadInt(); + if (version == Lucene.Net.Index.SegmentInfos.FORMAT_LOCKLESS) + { + long gen0 = genInput.ReadLong(); + long gen1 = genInput.ReadLong(); + Lucene.Net.Index.SegmentInfos.Message("fallback check: " + gen0 + "; " + gen1); + if (gen0 == gen1) + { + // The file is consistent. + genB = gen0; + break; + } + } + } + catch (System.IO.IOException) + { + // will retry + } + finally + { + genInput.Close(); + } + } + + System.Threading.Thread.Sleep(new TimeSpan((System.Int64) 10000 * Lucene.Net.Index.SegmentInfos.defaultGenFileRetryPauseMsec)); + + + } + + Lucene.Net.Index.SegmentInfos.Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB); + + // Pick the larger of the two gen's: + if (genA > genB) + gen = genA; + else + gen = genB; + + if (gen == - 1) + { + throw new System.IO.FileNotFoundException("no segments* file found in " + directory + ": files:" + string.Join(" ", files)); + } + } + + // Third method (fallback if first & second methods + // are not reliable): since both directory cache and + // file contents cache seem to be stale, just + // advance the generation. + if (1 == method || (0 == method && lastGen == gen && retry)) + { + + method = 1; + + if (genLookaheadCount < Lucene.Net.Index.SegmentInfos.defaultGenLookaheadCount) + { + gen++; + genLookaheadCount++; + Lucene.Net.Index.SegmentInfos.Message("look ahead increment gen to " + gen); + } + } + + if (lastGen == gen) + { + + // This means we're about to try the same + // segments_N last tried. This is allowed, + // exactly once, because writer could have been in + // the process of writing segments_N last time. + + if (retry) + { + // OK, we've tried the same segments_N file + // twice in a row, so this must be a real + // error. We throw the original exception we + // got. + throw exc; + } + + retry = true; + } + else if (0 == method) + { + // Segment file has advanced since our last loop, so + // reset retry: + retry = false; + } + + lastGen = gen; + + segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); + + try + { + System.Object v = DoBody(segmentFileName); + Lucene.Net.Index.SegmentInfos.Message("success on " + segmentFileName); + + return v; + } + catch (System.IO.IOException err) + { + + // Save the original root cause: + if (exc == null) + { + exc = err; + } + + Lucene.Net.Index.SegmentInfos.Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen); + + if (!retry && gen > 1) + { + + // This is our first time trying this segments + // file (because retry is false), and, there is + // possibly a segments_(N-1) (because gen > 1). + // So, check if the segments_(N-1) exists and + // try it if so: + System.String prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1); + + bool prevExists; + prevExists = directory.FileExists(prevSegmentFileName); + + if (prevExists) + { + Lucene.Net.Index.SegmentInfos.Message("fallback to prior segment file '" + prevSegmentFileName + "'"); + try + { + System.Object v = DoBody(prevSegmentFileName); + if (exc != null) + { + Lucene.Net.Index.SegmentInfos.Message("success on fallback " + prevSegmentFileName); + } + return v; + } + catch (System.IO.IOException err2) + { + Lucene.Net.Index.SegmentInfos.Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); + } + } + } + } + } + } + + /// Subclass must implement this. The assumption is an + /// IOException will be thrown if something goes wrong + /// during the processing that could have been caused by + /// a writer committing. + /// + public /*internal*/ abstract System.Object DoBody(System.String segmentFileName); + } + + /// Returns a new SegmentInfos containg the SegmentInfo + /// instances in the specified range first (inclusive) to + /// last (exclusive), so total number of segments returned + /// is last-first. + /// + public SegmentInfos Range(int first, int last) + { + SegmentInfos infos = new SegmentInfos(); + infos.AddRange(this.GetRange(first, last - first)); + return infos; + } + + // Carry over generation numbers from another SegmentInfos + internal void UpdateGeneration(SegmentInfos other) + { + lastGeneration = other.lastGeneration; + generation = other.generation; + version = other.version; + } + + internal void RollbackCommit(Directory dir) + { + if (pendingSegnOutput != null) + { + try + { + pendingSegnOutput.Close(); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + // in our caller + } + + // Must carefully compute fileName from "generation" + // since lastGeneration isn't incremented: + try + { + System.String segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); + dir.DeleteFile(segmentFileName); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + // in our caller + } + pendingSegnOutput = null; + } + } + + /// Call this to start a commit. This writes the new + /// segments file, but writes an invalid checksum at the + /// end, so that it is not visible to readers. Once this + /// is called you must call to complete + /// the commit or to abort it. + /// + internal void PrepareCommit(Directory dir) + { + if (pendingSegnOutput != null) + throw new System.SystemException("prepareCommit was already called"); + Write(dir); + } + + /// Returns all file names referenced by SegmentInfo + /// instances matching the provided Directory (ie files + /// associated with any "external" segments are skipped). + /// The returned collection is recomputed on each + /// invocation. + /// + public System.Collections.Generic.ICollection Files(Directory dir, bool includeSegmentsFile) + { + System.Collections.Generic.HashSet files = new System.Collections.Generic.HashSet(); + if (includeSegmentsFile) + { + files.Add(GetCurrentSegmentFileName()); + } + int size = Count; + for (int i = 0; i < size; i++) + { + SegmentInfo info = Info(i); + if (info.dir == dir) + { + files.UnionWith(Info(i).Files()); + } + } + return files; + } + + internal void FinishCommit(Directory dir) + { + if (pendingSegnOutput == null) + throw new System.SystemException("prepareCommit was not called"); + bool success = false; + try + { + pendingSegnOutput.FinishCommit(); + pendingSegnOutput.Close(); + pendingSegnOutput = null; + success = true; + } + finally + { + if (!success) + RollbackCommit(dir); + } + + // NOTE: if we crash here, we have left a segments_N + // file in the directory in a possibly corrupt state (if + // some bytes made it to stable storage and others + // didn't). But, the segments_N file includes checksum + // at the end, which should catch this case. So when a + // reader tries to read it, it will throw a + // CorruptIndexException, which should cause the retry + // logic in SegmentInfos to kick in and load the last + // good (previous) segments_N-1 file. + + System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation); + success = false; + try + { + dir.Sync(fileName); + success = true; + } + finally + { + if (!success) + { + try + { + dir.DeleteFile(fileName); + } + catch (System.Exception) + { + // Suppress so we keep throwing the original exception + } + } + } + + lastGeneration = generation; + + try + { + IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN); + try + { + genOutput.WriteInt(FORMAT_LOCKLESS); + genOutput.WriteLong(generation); + genOutput.WriteLong(generation); + } + finally + { + genOutput.Close(); + } + } + catch (System.Exception) + { + // It's OK if we fail to write this file since it's + // used only as one of the retry fallbacks. + } + } + + /// Writes & syncs to the Directory dir, taking care to + /// remove the segments file on exception + /// + public /*internal*/ void Commit(Directory dir) + { + PrepareCommit(dir); + FinishCommit(dir); + } + + public System.String SegString(Directory directory) + { + lock (this) + { + var buffer = new System.Text.StringBuilder(); + int count = Count; + for (int i = 0; i < count; i++) + { + if (i > 0) + { + buffer.Append(' '); + } + SegmentInfo info = Info(i); + buffer.Append(info.SegString(directory)); + if (info.dir != directory) + buffer.Append("**"); + } + return buffer.ToString(); + } + } + + public IDictionary UserData + { + get { return userData; } + internal set { + userData = value ?? new HashMap(); + } + } + + /// Replaces all segments in this instance, but keeps + /// generation, version, counter so that future commits + /// remain write once. + /// + internal void Replace(SegmentInfos other) + { + Clear(); + AddRange(other); + lastGeneration = other.lastGeneration; + } + + // Used only for testing + public bool HasExternalSegments(Directory dir) + { + int numSegments = Count; + for (int i = 0; i < numSegments; i++) + if (Info(i).dir != dir) + return true; + return false; + } + + #region Lucene.NET (Equals & GetHashCode ) + /// + /// Simple brute force implementation. + /// If size is equal, compare items one by one. + /// + /// SegmentInfos object to check equality for + /// true if lists are equal, false otherwise + public override bool Equals(object obj) + { + if (obj == null) return false; + + var objToCompare = obj as SegmentInfos; + if (objToCompare == null) return false; + + if (this.Count != objToCompare.Count) return false; + + for (int idx = 0; idx < this.Count; idx++) + { + if (!this[idx].Equals(objToCompare[idx])) return false; + } + + return true; + } + + /// + /// Calculate hash code of SegmentInfos + /// + /// hash code as in java version of ArrayList + public override int GetHashCode() + { + int h = 1; + for (int i = 0; i < this.Count; i++) + { + SegmentInfo si = (this[i] as SegmentInfo); + h = 31 * h + (si == null ? 0 : si.GetHashCode()); + } + + return h; + } + #endregion + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentMergeInfo.cs b/external/Lucene.Net.Light/src/core/Index/SegmentMergeInfo.cs new file mode 100644 index 0000000000..bad0aad131 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentMergeInfo.cs @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + sealed class SegmentMergeInfo : IDisposable + { + internal Term term; + internal int base_Renamed; + internal int ord; // the position of the segment in a MultiReader + internal TermEnum termEnum; + internal IndexReader reader; + internal int delCount; + private TermPositions postings; // use getPositions() + private int[] docMap; // use getDocMap() + + private bool isDisposed; + + internal SegmentMergeInfo(int b, TermEnum te, IndexReader r) + { + base_Renamed = b; + reader = r; + termEnum = te; + term = te.Term; + } + + // maps around deleted docs + internal int[] GetDocMap() + { + if (docMap == null) + { + delCount = 0; + // build array which maps document numbers around deletions + if (reader.HasDeletions) + { + int maxDoc = reader.MaxDoc; + docMap = new int[maxDoc]; + int j = 0; + for (int i = 0; i < maxDoc; i++) + { + if (reader.IsDeleted(i)) + { + delCount++; + docMap[i] = - 1; + } + else + docMap[i] = j++; + } + } + } + return docMap; + } + + internal TermPositions GetPositions() + { + if (postings == null) + { + postings = reader.TermPositions(); + } + return postings; + } + + internal bool Next() + { + if (termEnum.Next()) + { + term = termEnum.Term; + return true; + } + else + { + term = null; + return false; + } + } + + public void Dispose() + { + if (isDisposed) return; + + // Move to protected method if class becomes unsealed + termEnum.Close(); + if (postings != null) + { + postings.Close(); + } + + isDisposed = true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentMergeQueue.cs b/external/Lucene.Net.Light/src/core/Index/SegmentMergeQueue.cs new file mode 100644 index 0000000000..1b4858405f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentMergeQueue.cs @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Index +{ + + sealed class SegmentMergeQueue : PriorityQueue, IDisposable + { + internal SegmentMergeQueue(int size) + { + Initialize(size); + } + + public override bool LessThan(SegmentMergeInfo stiA, SegmentMergeInfo stiB) + { + int comparison = stiA.term.CompareTo(stiB.term); + if (comparison == 0) + return stiA.base_Renamed < stiB.base_Renamed; + else + return comparison < 0; + } + + public void Dispose() + { + // Move to protected method if class becomes unsealed + while (Top() != null) + Pop().Dispose(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentMerger.cs b/external/Lucene.Net.Light/src/core/Index/SegmentMerger.cs new file mode 100644 index 0000000000..0ab159da6f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentMerger.cs @@ -0,0 +1,934 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult; +using FieldOption = Lucene.Net.Index.IndexReader.FieldOption; +using MergeAbortedException = Lucene.Net.Index.MergePolicy.MergeAbortedException; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; + +namespace Lucene.Net.Index +{ + + /// The SegmentMerger class combines two or more Segments, represented by an IndexReader (, + /// into a single Segment. After adding the appropriate readers, call the merge method to combine the + /// segments. + ///

+ /// If the compoundFile flag is set, then the segments will be merged into a compound file. + /// + /// + ///

+ /// + /// + /// + /// + public sealed class SegmentMerger + { + private class AnonymousClassCheckAbort:CheckAbort + { + private void InitBlock(SegmentMerger enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SegmentMerger enclosingInstance; + public SegmentMerger Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassCheckAbort(SegmentMerger enclosingInstance, Lucene.Net.Index.MergePolicy.OneMerge Param1, Lucene.Net.Store.Directory Param2):base(Param1, Param2) + { + InitBlock(enclosingInstance); + } + public override void Work(double units) + { + // do nothing + } + } + private class AnonymousClassCheckAbort1:CheckAbort + { + private void InitBlock(SegmentMerger enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SegmentMerger enclosingInstance; + public SegmentMerger Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassCheckAbort1(SegmentMerger enclosingInstance, Lucene.Net.Index.MergePolicy.OneMerge Param1, Lucene.Net.Store.Directory Param2):base(Param1, Param2) + { + InitBlock(enclosingInstance); + } + public override void Work(double units) + { + // do nothing + } + } + + private void InitBlock() + { + termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL; + } + + /// norms header placeholder + internal static readonly byte[] NORMS_HEADER = new byte[]{(byte) 'N', (byte) 'R', (byte) 'M', unchecked((byte) - 1)}; + + private Directory directory; + private System.String segment; + private int termIndexInterval; + + private IList readers = new List(); + private FieldInfos fieldInfos; + + private int mergedDocs; + + private CheckAbort checkAbort; + + // Whether we should merge doc stores (stored fields and + // vectors files). When all segments we are merging + // already share the same doc store files, we don't need + // to merge the doc stores. + private bool mergeDocStores; + + /// Maximum number of contiguous documents to bulk-copy + /// when merging stored fields + /// + private const int MAX_RAW_MERGE_DOCS = 4192; + + /// This ctor used only by test code. + /// + /// + /// The Directory to merge the other segments into + /// + /// The name of the new segment + /// + public /*internal*/ SegmentMerger(Directory dir, System.String name) + { + InitBlock(); + directory = dir; + segment = name; + checkAbort = new AnonymousClassCheckAbort(this, null, null); + } + + internal SegmentMerger(IndexWriter writer, System.String name, MergePolicy.OneMerge merge) + { + InitBlock(); + directory = writer.Directory; + segment = name; + if (merge != null) + { + checkAbort = new CheckAbort(merge, directory); + } + else + { + checkAbort = new AnonymousClassCheckAbort1(this, null, null); + } + termIndexInterval = writer.TermIndexInterval; + } + + internal bool HasProx() + { + return fieldInfos.HasProx(); + } + + /// Add an IndexReader to the collection of readers that are to be merged + /// + /// + public /*internal*/ void Add(IndexReader reader) + { + readers.Add(reader); + } + + /// + /// The index of the reader to return + /// + /// The ith reader to be merged + /// + internal IndexReader SegmentReader(int i) + { + return readers[i]; + } + + /// Merges the readers specified by the method into the directory passed to the constructor + /// The number of documents that were merged + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public /*internal*/ int Merge() + { + return Merge(true); + } + + /// Merges the readers specified by the method + /// into the directory passed to the constructor. + /// + /// if false, we will not merge the + /// stored fields nor vectors files + /// + /// The number of documents that were merged + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + internal int Merge(bool mergeDocStores) + { + + this.mergeDocStores = mergeDocStores; + + // NOTE: it's important to add calls to + // checkAbort.work(...) if you make any changes to this + // method that will spend alot of time. The frequency + // of this check impacts how long + // IndexWriter.close(false) takes to actually stop the + // threads. + + mergedDocs = MergeFields(); + MergeTerms(); + MergeNorms(); + + if (mergeDocStores && fieldInfos.HasVectors()) + MergeVectors(); + + return mergedDocs; + } + + /// close all IndexReaders that have been added. + /// Should not be called before merge(). + /// + /// IOException + internal void CloseReaders() + { + foreach(IndexReader reader in readers) + { + reader.Dispose(); + } + } + + internal ICollection GetMergedFiles() + { + ISet fileSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); + + // Basic files + for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.Length; i++) + { + System.String ext = IndexFileNames.COMPOUND_EXTENSIONS[i]; + + if (ext.Equals(IndexFileNames.PROX_EXTENSION) && !HasProx()) + continue; + + if (mergeDocStores || (!ext.Equals(IndexFileNames.FIELDS_EXTENSION) && !ext.Equals(IndexFileNames.FIELDS_INDEX_EXTENSION))) + fileSet.Add(segment + "." + ext); + } + + // Fieldable norm files + for (int i = 0; i < fieldInfos.Size(); i++) + { + FieldInfo fi = fieldInfos.FieldInfo(i); + if (fi.isIndexed && !fi.omitNorms) + { + fileSet.Add(segment + "." + IndexFileNames.NORMS_EXTENSION); + break; + } + } + + // Vector files + if (fieldInfos.HasVectors() && mergeDocStores) + { + for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.Length; i++) + { + fileSet.Add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]); + } + } + + return fileSet; + } + + public /*internal*/ ICollection CreateCompoundFile(System.String fileName) + { + ICollection files = GetMergedFiles(); + CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort); + + // Now merge all added files + foreach(var file in files) + { + cfsWriter.AddFile(file); + } + + // Perform the merge + cfsWriter.Close(); + + return files; + } + + private void AddIndexed(IndexReader reader, FieldInfos fInfos, ICollection names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions) + { + foreach (var field in names) + { + fInfos.Add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, + !reader.HasNorms(field), storePayloads, omitTFAndPositions); + } + } + + private SegmentReader[] matchingSegmentReaders; + private int[] rawDocLengths; + private int[] rawDocLengths2; + + private void SetMatchingSegmentReaders() + { + // If the i'th reader is a SegmentReader and has + // identical fieldName -> number mapping, then this + // array will be non-null at position i: + int numReaders = readers.Count; + matchingSegmentReaders = new SegmentReader[numReaders]; + + // If this reader is a SegmentReader, and all of its + // field name -> number mappings match the "merged" + // FieldInfos, then we can do a bulk copy of the + // stored fields: + for (int i = 0; i < numReaders; i++) + { + IndexReader reader = readers[i]; + if (reader is SegmentReader) + { + SegmentReader segmentReader = (SegmentReader) reader; + bool same = true; + FieldInfos segmentFieldInfos = segmentReader.FieldInfos(); + int numFieldInfos = segmentFieldInfos.Size(); + for (int j = 0; same && j < numFieldInfos; j++) + { + same = fieldInfos.FieldName(j).Equals(segmentFieldInfos.FieldName(j)); + } + if (same) + { + matchingSegmentReaders[i] = segmentReader; + } + } + } + + // Used for bulk-reading raw bytes for stored fields + rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; + rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS]; + } + + /// + /// The number of documents in all of the readers + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + private int MergeFields() + { + + if (!mergeDocStores) + { + // When we are not merging by doc stores, their field + // name -> number mapping are the same. So, we start + // with the fieldInfos of the last segment in this + // case, to keep that numbering. + SegmentReader sr = (SegmentReader) readers[readers.Count - 1]; + fieldInfos = (FieldInfos) sr.core.fieldInfos.Clone(); + } + else + { + fieldInfos = new FieldInfos(); // merge field names + } + + foreach(IndexReader reader in readers) + { + if (reader is SegmentReader) + { + SegmentReader segmentReader = (SegmentReader) reader; + FieldInfos readerFieldInfos = segmentReader.FieldInfos(); + int numReaderFieldInfos = readerFieldInfos.Size(); + for (int j = 0; j < numReaderFieldInfos; j++) + { + FieldInfo fi = readerFieldInfos.FieldInfo(j); + fieldInfos.Add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.HasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); + } + } + else + { + AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); + AddIndexed(reader, fieldInfos, reader.GetFieldNames(FieldOption.INDEXED), false, false, false, false, false); + fieldInfos.Add(reader.GetFieldNames(FieldOption.UNINDEXED), false); + } + } + fieldInfos.Write(directory, segment + ".fnm"); + + int docCount = 0; + + SetMatchingSegmentReaders(); + + if (mergeDocStores) + { + // merge field values + FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); + + try + { + int idx = 0; + foreach(IndexReader reader in readers) + { + SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; + FieldsReader matchingFieldsReader = null; + if (matchingSegmentReader != null) + { + FieldsReader fieldsReader = matchingSegmentReader.GetFieldsReader(); + if (fieldsReader != null && fieldsReader.CanReadRawDocs()) + { + matchingFieldsReader = fieldsReader; + } + } + if (reader.HasDeletions) + { + docCount += CopyFieldsWithDeletions(fieldsWriter, reader, matchingFieldsReader); + } + else + { + docCount += CopyFieldsNoDeletions(fieldsWriter, reader, matchingFieldsReader); + } + } + } + finally + { + fieldsWriter.Dispose(); + } + + System.String fileName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; + long fdxFileLength = directory.FileLength(fileName); + + if (4 + ((long) docCount) * 8 != fdxFileLength) + // This is most likely a bug in Sun JRE 1.6.0_04/_05; + // we detect that the bug has struck, here, and + // throw an exception to prevent the corruption from + // entering the index. See LUCENE-1282 for + // details. + throw new System.SystemException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); + } + // If we are skipping the doc stores, that means there + // are no deletions in any of these segments, so we + // just sum numDocs() of each segment to get total docCount + else + { + foreach(IndexReader reader in readers) + { + docCount += reader.NumDocs(); + } + } + + return docCount; + } + + private int CopyFieldsWithDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) + { + int docCount = 0; + int maxDoc = reader.MaxDoc; + if (matchingFieldsReader != null) + { + // We can bulk-copy because the fieldInfos are "congruent" + for (int j = 0; j < maxDoc; ) + { + if (reader.IsDeleted(j)) + { + // skip deleted docs + ++j; + continue; + } + // We can optimize this case (doing a bulk byte copy) since the field + // numbers are identical + int start = j, numDocs = 0; + do + { + j++; + numDocs++; + if (j >= maxDoc) + break; + if (reader.IsDeleted(j)) + { + j++; + break; + } + } + while (numDocs < MAX_RAW_MERGE_DOCS); + + IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); + fieldsWriter.AddRawDocuments(stream, rawDocLengths, numDocs); + docCount += numDocs; + checkAbort.Work(300 * numDocs); + } + } + else + { + for (int j = 0; j < maxDoc; j++) + { + if (reader.IsDeleted(j)) + { + // skip deleted docs + continue; + } + // NOTE: it's very important to first assign to doc then pass it to + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 + Document doc = reader.Document(j); + fieldsWriter.AddDocument(doc); + docCount++; + checkAbort.Work(300); + } + } + return docCount; + } + + private int CopyFieldsNoDeletions(FieldsWriter fieldsWriter, IndexReader reader, FieldsReader matchingFieldsReader) + { + int maxDoc = reader.MaxDoc; + int docCount = 0; + if (matchingFieldsReader != null) + { + // We can bulk-copy because the fieldInfos are "congruent" + while (docCount < maxDoc) + { + int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); + IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, docCount, len); + fieldsWriter.AddRawDocuments(stream, rawDocLengths, len); + docCount += len; + checkAbort.Work(300 * len); + } + } + else + { + for (; docCount < maxDoc; docCount++) + { + // NOTE: it's very important to first assign to doc then pass it to + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 + Document doc = reader.Document(docCount); + fieldsWriter.AddDocument(doc); + checkAbort.Work(300); + } + } + return docCount; + } + + /// Merge the TermVectors from each of the segments into the new one. + /// IOException + private void MergeVectors() + { + TermVectorsWriter termVectorsWriter = new TermVectorsWriter(directory, segment, fieldInfos); + + try + { + int idx = 0; + foreach(IndexReader reader in readers) + { + SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; + TermVectorsReader matchingVectorsReader = null; + if (matchingSegmentReader != null) + { + TermVectorsReader vectorsReader = matchingSegmentReader.GetTermVectorsReaderOrig(); + + // If the TV* files are an older format then they cannot read raw docs: + if (vectorsReader != null && vectorsReader.CanReadRawDocs()) + { + matchingVectorsReader = vectorsReader; + } + } + if (reader.HasDeletions) + { + CopyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader); + } + else + { + CopyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader); + } + } + } + finally + { + termVectorsWriter.Dispose(); + } + + System.String fileName = segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; + long tvxSize = directory.FileLength(fileName); + + if (4 + ((long) mergedDocs) * 16 != tvxSize) + // This is most likely a bug in Sun JRE 1.6.0_04/_05; + // we detect that the bug has struck, here, and + // throw an exception to prevent the corruption from + // entering the index. See LUCENE-1282 for + // details. + throw new System.SystemException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + " file=" + fileName + " file exists?=" + directory.FileExists(fileName) + "; now aborting this merge to prevent index corruption"); + } + + private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) + { + int maxDoc = reader.MaxDoc; + if (matchingVectorsReader != null) + { + // We can bulk-copy because the fieldInfos are "congruent" + for (int docNum = 0; docNum < maxDoc; ) + { + if (reader.IsDeleted(docNum)) + { + // skip deleted docs + ++docNum; + continue; + } + // We can optimize this case (doing a bulk byte copy) since the field + // numbers are identical + int start = docNum, numDocs = 0; + do + { + docNum++; + numDocs++; + if (docNum >= maxDoc) + break; + if (reader.IsDeleted(docNum)) + { + docNum++; + break; + } + } + while (numDocs < MAX_RAW_MERGE_DOCS); + + matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); + termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); + checkAbort.Work(300 * numDocs); + } + } + else + { + for (int docNum = 0; docNum < maxDoc; docNum++) + { + if (reader.IsDeleted(docNum)) + { + // skip deleted docs + continue; + } + + // NOTE: it's very important to first assign to vectors then pass it to + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 + ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); + termVectorsWriter.AddAllDocVectors(vectors); + checkAbort.Work(300); + } + } + } + + private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) + { + int maxDoc = reader.MaxDoc; + if (matchingVectorsReader != null) + { + // We can bulk-copy because the fieldInfos are "congruent" + int docCount = 0; + while (docCount < maxDoc) + { + int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); + matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); + termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); + docCount += len; + checkAbort.Work(300 * len); + } + } + else + { + for (int docNum = 0; docNum < maxDoc; docNum++) + { + // NOTE: it's very important to first assign to vectors then pass it to + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 + ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); + termVectorsWriter.AddAllDocVectors(vectors); + checkAbort.Work(300); + } + } + } + + private SegmentMergeQueue queue = null; + + private void MergeTerms() + { + + SegmentWriteState state = new SegmentWriteState(null, directory, segment, null, mergedDocs, 0, termIndexInterval); + + FormatPostingsFieldsConsumer consumer = new FormatPostingsFieldsWriter(state, fieldInfos); + + try + { + queue = new SegmentMergeQueue(readers.Count); + + MergeTermInfos(consumer); + } + finally + { + consumer.Finish(); + if (queue != null) + queue.Dispose(); + } + } + + internal bool omitTermFreqAndPositions; + + private void MergeTermInfos(FormatPostingsFieldsConsumer consumer) + { + int base_Renamed = 0; + int readerCount = readers.Count; + for (int i = 0; i < readerCount; i++) + { + IndexReader reader = readers[i]; + TermEnum termEnum = reader.Terms(); + SegmentMergeInfo smi = new SegmentMergeInfo(base_Renamed, termEnum, reader); + int[] docMap = smi.GetDocMap(); + if (docMap != null) + { + if (docMaps == null) + { + docMaps = new int[readerCount][]; + delCounts = new int[readerCount]; + } + docMaps[i] = docMap; + delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs(); + } + + base_Renamed += reader.NumDocs(); + + System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount); + + if (smi.Next()) + queue.Add(smi); + // initialize queue + else + smi.Dispose(); + } + + SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count]; + + System.String currentField = null; + FormatPostingsTermsConsumer termsConsumer = null; + + while (queue.Size() > 0) + { + int matchSize = 0; // pop matching terms + match[matchSize++] = queue.Pop(); + Term term = match[0].term; + SegmentMergeInfo top = queue.Top(); + + while (top != null && term.CompareTo(top.term) == 0) + { + match[matchSize++] = queue.Pop(); + top = queue.Top(); + } + + if ((System.Object) currentField != (System.Object) term.Field) + { + currentField = term.Field; + if (termsConsumer != null) + termsConsumer.Finish(); + FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField); + termsConsumer = consumer.AddField(fieldInfo); + omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions; + } + + int df = AppendPostings(termsConsumer, match, matchSize); // add new TermInfo + + checkAbort.Work(df / 3.0); + + while (matchSize > 0) + { + SegmentMergeInfo smi = match[--matchSize]; + if (smi.Next()) + queue.Add(smi); + // restore queue + else + smi.Dispose(); // done with a segment + } + } + } + + private byte[] payloadBuffer; + private int[][] docMaps; + internal int[][] GetDocMaps() + { + return docMaps; + } + private int[] delCounts; + internal int[] GetDelCounts() + { + return delCounts; + } + + /// Process postings from multiple segments all positioned on the + /// same term. Writes out merged entries into freqOutput and + /// the proxOutput streams. + /// + /// + /// array of segments + /// + /// number of cells in the array actually occupied + /// + /// number of documents across all segments where this term was found + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + private int AppendPostings(FormatPostingsTermsConsumer termsConsumer, SegmentMergeInfo[] smis, int n) + { + + FormatPostingsDocsConsumer docConsumer = termsConsumer.AddTerm(smis[0].term.Text); + int df = 0; + for (int i = 0; i < n; i++) + { + SegmentMergeInfo smi = smis[i]; + TermPositions postings = smi.GetPositions(); + System.Diagnostics.Debug.Assert(postings != null); + int base_Renamed = smi.base_Renamed; + int[] docMap = smi.GetDocMap(); + postings.Seek(smi.termEnum); + + while (postings.Next()) + { + df++; + int doc = postings.Doc; + if (docMap != null) + doc = docMap[doc]; // map around deletions + doc += base_Renamed; // convert to merged space + + int freq = postings.Freq; + FormatPostingsPositionsConsumer posConsumer = docConsumer.AddDoc(doc, freq); + + if (!omitTermFreqAndPositions) + { + for (int j = 0; j < freq; j++) + { + int position = postings.NextPosition(); + int payloadLength = postings.PayloadLength; + if (payloadLength > 0) + { + if (payloadBuffer == null || payloadBuffer.Length < payloadLength) + payloadBuffer = new byte[payloadLength]; + postings.GetPayload(payloadBuffer, 0); + } + posConsumer.AddPosition(position, payloadBuffer, 0, payloadLength); + } + posConsumer.Finish(); + } + } + } + docConsumer.Finish(); + + return df; + } + + private void MergeNorms() + { + byte[] normBuffer = null; + IndexOutput output = null; + try + { + int numFieldInfos = fieldInfos.Size(); + for (int i = 0; i < numFieldInfos; i++) + { + FieldInfo fi = fieldInfos.FieldInfo(i); + if (fi.isIndexed && !fi.omitNorms) + { + if (output == null) + { + output = directory.CreateOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); + output.WriteBytes(NORMS_HEADER, NORMS_HEADER.Length); + } + foreach(IndexReader reader in readers) + { + int maxDoc = reader.MaxDoc; + if (normBuffer == null || normBuffer.Length < maxDoc) + { + // the buffer is too small for the current segment + normBuffer = new byte[maxDoc]; + } + reader.Norms(fi.name, normBuffer, 0); + if (!reader.HasDeletions) + { + //optimized case for segments without deleted docs + output.WriteBytes(normBuffer, maxDoc); + } + else + { + // this segment has deleted docs, so we have to + // check for every doc if it is deleted or not + for (int k = 0; k < maxDoc; k++) + { + if (!reader.IsDeleted(k)) + { + output.WriteByte(normBuffer[k]); + } + } + } + checkAbort.Work(maxDoc); + } + } + } + } + finally + { + if (output != null) + { + output.Close(); + } + } + } + + internal class CheckAbort + { + private double workCount; + private MergePolicy.OneMerge merge; + private Directory dir; + public CheckAbort(MergePolicy.OneMerge merge, Directory dir) + { + this.merge = merge; + this.dir = dir; + } + + /// Records the fact that roughly units amount of work + /// have been done since this method was last called. + /// When adding time-consuming code into SegmentMerger, + /// you should test different values for units to ensure + /// that the time in between calls to merge.checkAborted + /// is up to ~ 1 second. + /// + public virtual void Work(double units) + { + workCount += units; + if (workCount >= 10000.0) + { + merge.CheckAborted(dir); + workCount = 0; + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentReader.cs b/external/Lucene.Net.Light/src/core/Index/SegmentReader.cs new file mode 100644 index 0000000000..8cc5d3b51c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentReader.cs @@ -0,0 +1,1692 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using Lucene.Net.Support; +using Lucene.Net.Util; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using BitVector = Lucene.Net.Util.BitVector; +using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity; + +namespace Lucene.Net.Index +{ + + /// $Id + /// + ///

NOTE: This API is new and still experimental + /// (subject to change suddenly in the next release)

+ ///

+ public class SegmentReader : IndexReader + { + public SegmentReader() + { + InitBlock(); + } + private void InitBlock() + { + fieldsReaderLocal = new FieldsReaderLocal(this); + } + protected internal bool readOnly; + + private SegmentInfo si; + private int readBufferSize; + + internal CloseableThreadLocal fieldsReaderLocal; + internal CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); + + internal BitVector deletedDocs = null; + internal Ref deletedDocsRef = null; + private bool deletedDocsDirty = false; + private bool normsDirty = false; + private int pendingDeleteCount; + + private bool rollbackHasChanges = false; + private bool rollbackDeletedDocsDirty = false; + private bool rollbackNormsDirty = false; + private SegmentInfo rollbackSegmentInfo; + private int rollbackPendingDeleteCount; + + // optionally used for the .nrm file shared by multiple norms + private IndexInput singleNormStream; + private Ref singleNormRef; + + internal CoreReaders core; + + // Holds core readers that are shared (unchanged) when + // SegmentReader is cloned or reopened + public /*internal*/ sealed class CoreReaders + { + + // Counts how many other reader share the core objects + // (freqStream, proxStream, tis, etc.) of this reader; + // when coreRef drops to 0, these core objects may be + // closed. A given insance of SegmentReader may be + // closed, even those it shares core objects with other + // SegmentReaders: + private readonly Ref ref_Renamed = new Ref(); + + internal System.String segment; + internal FieldInfos fieldInfos; + internal IndexInput freqStream; + internal IndexInput proxStream; + internal TermInfosReader tisNoIndex; + + internal Directory dir; + internal Directory cfsDir; + internal int readBufferSize; + internal int termsIndexDivisor; + + internal SegmentReader origInstance; + + internal TermInfosReader tis; + internal FieldsReader fieldsReaderOrig; + internal TermVectorsReader termVectorsReaderOrig; + internal CompoundFileReader cfsReader; + internal CompoundFileReader storeCFSReader; + + internal CoreReaders(SegmentReader origInstance, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) + { + segment = si.name; + this.readBufferSize = readBufferSize; + this.dir = dir; + + bool success = false; + + try + { + Directory dir0 = dir; + if (si.GetUseCompoundFile()) + { + cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); + dir0 = cfsReader; + } + cfsDir = dir0; + + fieldInfos = new FieldInfos(cfsDir, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION); + + this.termsIndexDivisor = termsIndexDivisor; + var reader = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor); + if (termsIndexDivisor == - 1) + { + tisNoIndex = reader; + } + else + { + tis = reader; + tisNoIndex = null; + } + + // make sure that all index files have been read or are kept open + // so that if an index update removes them we'll still have them + freqStream = cfsDir.OpenInput(segment + "." + IndexFileNames.FREQ_EXTENSION, readBufferSize); + + proxStream = fieldInfos.HasProx() ? cfsDir.OpenInput(segment + "." + IndexFileNames.PROX_EXTENSION, readBufferSize) : null; + success = true; + } + finally + { + if (!success) + { + DecRef(); + } + } + + + // Must assign this at the end -- if we hit an + // exception above core, we don't want to attempt to + // purge the FieldCache (will hit NPE because core is + // not assigned yet). + this.origInstance = origInstance; + } + + internal TermVectorsReader GetTermVectorsReaderOrig() + { + lock (this) + { + return termVectorsReaderOrig; + } + } + + internal FieldsReader GetFieldsReaderOrig() + { + lock (this) + { + return fieldsReaderOrig; + } + } + + internal void IncRef() + { + lock (this) + { + ref_Renamed.IncRef(); + } + } + + internal Directory GetCFSReader() + { + lock (this) + { + return cfsReader; + } + } + + internal TermInfosReader GetTermsReader() + { + lock (this) + { + if (tis != null) + { + return tis; + } + else + { + return tisNoIndex; + } + } + } + + internal bool TermsIndexIsLoaded() + { + lock (this) + { + return tis != null; + } + } + + // NOTE: only called from IndexWriter when a near + // real-time reader is opened, or applyDeletes is run, + // sharing a segment that's still being merged. This + // method is not fully thread safe, and relies on the + // synchronization in IndexWriter + internal void LoadTermsIndex(SegmentInfo si, int termsIndexDivisor) + { + lock (this) + { + if (tis == null) + { + Directory dir0; + if (si.GetUseCompoundFile()) + { + // In some cases, we were originally opened when CFS + // was not used, but then we are asked to open the + // terms reader with index, the segment has switched + // to CFS + if (cfsReader == null) + { + cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); + } + dir0 = cfsReader; + } + else + { + dir0 = dir; + } + + tis = new TermInfosReader(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); + } + } + } + + internal void DecRef() + { + lock (this) + { + + if (ref_Renamed.DecRef() == 0) + { + + // close everything, nothing is shared anymore with other readers + if (tis != null) + { + tis.Dispose(); + // null so if an app hangs on to us we still free most ram + tis = null; + } + + if (tisNoIndex != null) + { + tisNoIndex.Dispose(); + } + + if (freqStream != null) + { + freqStream.Close(); + } + + if (proxStream != null) + { + proxStream.Close(); + } + + if (termVectorsReaderOrig != null) + { + termVectorsReaderOrig.Dispose(); + } + + if (fieldsReaderOrig != null) + { + fieldsReaderOrig.Dispose(); + } + + if (cfsReader != null) + { + cfsReader.Close(); + } + + if (storeCFSReader != null) + { + storeCFSReader.Close(); + } + + // Force FieldCache to evict our entries at this point + if (origInstance != null) + { + Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(origInstance); + } + } + } + } + + internal void OpenDocStores(SegmentInfo si) + { + lock (this) + { + + System.Diagnostics.Debug.Assert(si.name.Equals(segment)); + + if (fieldsReaderOrig == null) + { + Directory storeDir; + if (si.DocStoreOffset != - 1) + { + if (si.DocStoreIsCompoundFile) + { + System.Diagnostics.Debug.Assert(storeCFSReader == null); + storeCFSReader = new CompoundFileReader(dir, si.DocStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); + storeDir = storeCFSReader; + System.Diagnostics.Debug.Assert(storeDir != null); + } + else + { + storeDir = dir; + System.Diagnostics.Debug.Assert(storeDir != null); + } + } + else if (si.GetUseCompoundFile()) + { + // In some cases, we were originally opened when CFS + // was not used, but then we are asked to open doc + // stores after the segment has switched to CFS + if (cfsReader == null) + { + cfsReader = new CompoundFileReader(dir, segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); + } + storeDir = cfsReader; + System.Diagnostics.Debug.Assert(storeDir != null); + } + else + { + storeDir = dir; + System.Diagnostics.Debug.Assert(storeDir != null); + } + + string storesSegment = si.DocStoreOffset != - 1 ? si.DocStoreSegment : segment; + + fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.DocStoreOffset, si.docCount); + + // Verify two sources of "maxDoc" agree: + if (si.DocStoreOffset == - 1 && fieldsReaderOrig.Size() != si.docCount) + { + throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + fieldsReaderOrig.Size() + " but segmentInfo shows " + si.docCount); + } + + if (fieldInfos.HasVectors()) + { + // open term vector files only as needed + termVectorsReaderOrig = new TermVectorsReader(storeDir, storesSegment, fieldInfos, readBufferSize, si.DocStoreOffset, si.docCount); + } + } + } + } + + public FieldInfos fieldInfos_ForNUnit + { + get { return fieldInfos; } + } + } + + /// Sets the initial value + private class FieldsReaderLocal : CloseableThreadLocal + { + public FieldsReaderLocal(SegmentReader enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(SegmentReader enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SegmentReader enclosingInstance; + public SegmentReader Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public /*protected internal*/ override FieldsReader InitialValue() + { + return (FieldsReader) Enclosing_Instance.core.GetFieldsReaderOrig().Clone(); + } + } + + public /*internal*/ class Ref + { + private int refCount = 1; + + public override System.String ToString() + { + return "refcount: " + refCount; + } + + public virtual int RefCount() + { + lock (this) + { + return refCount; + } + } + + public virtual int IncRef() + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0); + refCount++; + return refCount; + } + } + + public virtual int DecRef() + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0); + refCount--; + return refCount; + } + } + } + + /// Byte[] referencing is used because a new norm object needs + /// to be created for each clone, and the byte array is all + /// that is needed for sharing between cloned readers. The + /// current norm referencing is for sharing between readers + /// whereas the byte[] referencing is for copy on write which + /// is independent of reader references (i.e. incRef, decRef). + /// + + public /*internal*/ sealed class Norm : System.ICloneable + { + private void InitBlock(SegmentReader enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SegmentReader enclosingInstance; + public SegmentReader Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal /*private*/ int refCount = 1; + + // If this instance is a clone, the originalNorm + // references the Norm that has a real open IndexInput: + private Norm origNorm; + + private IndexInput in_Renamed; + private readonly long normSeek; + + // null until bytes is set + private Ref bytesRef; + internal /*private*/ byte[] bytes; + internal /*private*/ bool dirty; + internal /*private*/ int number; + internal /*private*/ bool rollbackDirty; + + public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number, long normSeek) + { + InitBlock(enclosingInstance); + this.in_Renamed = in_Renamed; + this.number = number; + this.normSeek = normSeek; + } + + public void IncRef() + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); + refCount++; + } + } + + private void CloseInput() + { + if (in_Renamed != null) + { + if (in_Renamed != Enclosing_Instance.singleNormStream) + { + // It's private to us -- just close it + in_Renamed.Dispose(); + } + else + { + // We are sharing this with others -- decRef and + // maybe close the shared norm stream + if (Enclosing_Instance.singleNormRef.DecRef() == 0) + { + Enclosing_Instance.singleNormStream.Dispose(); + Enclosing_Instance.singleNormStream = null; + } + } + + in_Renamed = null; + } + } + + public void DecRef() + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); + + if (--refCount == 0) + { + if (origNorm != null) + { + origNorm.DecRef(); + origNorm = null; + } + else + { + CloseInput(); + } + + if (bytes != null) + { + System.Diagnostics.Debug.Assert(bytesRef != null); + bytesRef.DecRef(); + bytes = null; + bytesRef = null; + } + else + { + System.Diagnostics.Debug.Assert(bytesRef == null); + } + } + } + } + + // Load bytes but do not cache them if they were not + // already cached + public void Bytes(byte[] bytesOut, int offset, int len) + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); + if (bytes != null) + { + // Already cached -- copy from cache: + System.Diagnostics.Debug.Assert(len <= Enclosing_Instance.MaxDoc); + Array.Copy(bytes, 0, bytesOut, offset, len); + } + else + { + // Not cached + if (origNorm != null) + { + // Ask origNorm to load + origNorm.Bytes(bytesOut, offset, len); + } + else + { + // We are orig -- read ourselves from disk: + lock (in_Renamed) + { + in_Renamed.Seek(normSeek); + in_Renamed.ReadBytes(bytesOut, offset, len, false); + } + } + } + } + } + + // Load & cache full bytes array. Returns bytes. + public byte[] Bytes() + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); + if (bytes == null) + { + // value not yet read + System.Diagnostics.Debug.Assert(bytesRef == null); + if (origNorm != null) + { + // Ask origNorm to load so that for a series of + // reopened readers we share a single read-only + // byte[] + bytes = origNorm.Bytes(); + bytesRef = origNorm.bytesRef; + bytesRef.IncRef(); + + // Once we've loaded the bytes we no longer need + // origNorm: + origNorm.DecRef(); + origNorm = null; + } + else + { + // We are the origNorm, so load the bytes for real + // ourself: + int count = Enclosing_Instance.MaxDoc; + bytes = new byte[count]; + + // Since we are orig, in must not be null + System.Diagnostics.Debug.Assert(in_Renamed != null); + + // Read from disk. + lock (in_Renamed) + { + in_Renamed.Seek(normSeek); + in_Renamed.ReadBytes(bytes, 0, count, false); + } + + bytesRef = new Ref(); + CloseInput(); + } + } + + return bytes; + } + } + + // Only for testing + public /*internal*/ Ref BytesRef() + { + return bytesRef; + } + + // Called if we intend to change a norm value. We make a + // private copy of bytes if it's shared with others: + public byte[] CopyOnWrite() + { + lock (this) + { + System.Diagnostics.Debug.Assert(refCount > 0 &&(origNorm == null || origNorm.refCount > 0)); + Bytes(); + System.Diagnostics.Debug.Assert(bytes != null); + System.Diagnostics.Debug.Assert(bytesRef != null); + if (bytesRef.RefCount() > 1) + { + // I cannot be the origNorm for another norm + // instance if I'm being changed. Ie, only the + // "head Norm" can be changed: + System.Diagnostics.Debug.Assert(refCount == 1); + Ref oldRef = bytesRef; + bytes = Enclosing_Instance.CloneNormBytes(bytes); + bytesRef = new Ref(); + oldRef.DecRef(); + } + dirty = true; + return bytes; + } + } + + // Returns a copy of this Norm instance that shares + // IndexInput & bytes with the original one + public System.Object Clone() + { + lock (this) //LUCENENET-375 + { + System.Diagnostics.Debug.Assert(refCount > 0 && (origNorm == null || origNorm.refCount > 0)); + + Norm clone; + try + { + clone = (Norm)base.MemberwiseClone(); + } + catch (System.Exception cnse) + { + // Cannot happen + throw new System.SystemException("unexpected CloneNotSupportedException", cnse); + } + clone.refCount = 1; + + if (bytes != null) + { + System.Diagnostics.Debug.Assert(bytesRef != null); + System.Diagnostics.Debug.Assert(origNorm == null); + + // Clone holds a reference to my bytes: + clone.bytesRef.IncRef(); + } + else + { + System.Diagnostics.Debug.Assert(bytesRef == null); + if (origNorm == null) + { + // I become the origNorm for the clone: + clone.origNorm = this; + } + clone.origNorm.IncRef(); + } + + // Only the origNorm will actually readBytes from in: + clone.in_Renamed = null; + + return clone; + } + } + + // Flush all pending changes to the next generation + // separate norms file. + public void ReWrite(SegmentInfo si) + { + System.Diagnostics.Debug.Assert(refCount > 0 && (origNorm == null || origNorm.refCount > 0), "refCount=" + refCount + " origNorm=" + origNorm); + + // NOTE: norms are re-written in regular directory, not cfs + si.AdvanceNormGen(this.number); + string normFileName = si.GetNormFileName(this.number); + IndexOutput @out = enclosingInstance.Directory().CreateOutput(normFileName); + bool success = false; + try + { + try { + @out.WriteBytes(bytes, enclosingInstance.MaxDoc); + } finally { + @out.Close(); + } + success = true; + } + finally + { + if (!success) + { + try + { + enclosingInstance.Directory().DeleteFile(normFileName); + } + catch (Exception) + { + // suppress this so we keep throwing the + // original exception + } + } + } + this.dirty = false; + } + } + + internal System.Collections.Generic.IDictionary norms = new HashMap(); + + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public static SegmentReader Get(bool readOnly, SegmentInfo si, int termInfosIndexDivisor) + { + return Get(readOnly, si.dir, si, BufferedIndexInput.BUFFER_SIZE, true, termInfosIndexDivisor); + } + + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public static SegmentReader Get(bool readOnly, Directory dir, SegmentInfo si, int readBufferSize, bool doOpenStores, int termInfosIndexDivisor) + { + SegmentReader instance = readOnly ? new ReadOnlySegmentReader() : new SegmentReader(); + instance.readOnly = readOnly; + instance.si = si; + instance.readBufferSize = readBufferSize; + + bool success = false; + + try + { + instance.core = new CoreReaders(instance, dir, si, readBufferSize, termInfosIndexDivisor); + if (doOpenStores) + { + instance.core.OpenDocStores(si); + } + instance.LoadDeletedDocs(); + instance.OpenNorms(instance.core.cfsDir, readBufferSize); + success = true; + } + finally + { + + // With lock-less commits, it's entirely possible (and + // fine) to hit a FileNotFound exception above. In + // this case, we want to explicitly close any subset + // of things that were opened so that we don't have to + // wait for a GC to do so. + if (!success) + { + instance.DoClose(); + } + } + return instance; + } + + internal virtual void OpenDocStores() + { + core.OpenDocStores(si); + } + + private bool CheckDeletedCounts() + { + int recomputedCount = deletedDocs.GetRecomputedCount(); + + System.Diagnostics.Debug.Assert(deletedDocs.Count() == recomputedCount, "deleted count=" + deletedDocs.Count() + " vs recomputed count=" + recomputedCount); + + System.Diagnostics.Debug.Assert(si.GetDelCount() == recomputedCount, "delete count mismatch: info=" + si.GetDelCount() + " vs BitVector=" + recomputedCount); + + // Verify # deletes does not exceed maxDoc for this + // segment: + System.Diagnostics.Debug.Assert(si.GetDelCount() <= MaxDoc, "delete count mismatch: " + recomputedCount + ") exceeds max doc (" + MaxDoc + ") for segment " + si.name); + + return true; + } + + private void LoadDeletedDocs() + { + // NOTE: the bitvector is stored using the regular directory, not cfs + //if(HasDeletions(si)) + if (si.HasDeletions()) + { + deletedDocs = new BitVector(Directory(), si.GetDelFileName()); + deletedDocsRef = new Ref(); + + System.Diagnostics.Debug.Assert(CheckDeletedCounts()); + } + else + System.Diagnostics.Debug.Assert(si.GetDelCount() == 0); + } + + /// Clones the norm bytes. May be overridden by subclasses. New and experimental. + /// Byte array to clone + /// + /// New BitVector + /// + protected internal virtual byte[] CloneNormBytes(byte[] bytes) + { + var cloneBytes = new byte[bytes.Length]; + Array.Copy(bytes, 0, cloneBytes, 0, bytes.Length); + return cloneBytes; + } + + /// Clones the deleteDocs BitVector. May be overridden by subclasses. New and experimental. + /// BitVector to clone + /// + /// New BitVector + /// + protected internal virtual BitVector CloneDeletedDocs(BitVector bv) + { + return (BitVector) bv.Clone(); + } + + public override System.Object Clone() + { + lock (this) + { + try + { + return Clone(readOnly); // Preserve current readOnly + } + catch (System.Exception ex) + { + throw new System.SystemException(ex.Message, ex); + } + } + } + + public override IndexReader Clone(bool openReadOnly) + { + lock (this) + { + return ReopenSegment(si, true, openReadOnly); + } + } + + internal virtual SegmentReader ReopenSegment(SegmentInfo si, bool doClone, bool openReadOnly) + { + lock (this) + { + bool deletionsUpToDate = (this.si.HasDeletions() == si.HasDeletions()) && (!si.HasDeletions() || this.si.GetDelFileName().Equals(si.GetDelFileName())); + bool normsUpToDate = true; + + bool[] fieldNormsChanged = new bool[core.fieldInfos.Size()]; + int fieldCount = core.fieldInfos.Size(); + for (int i = 0; i < fieldCount; i++) + { + if (!this.si.GetNormFileName(i).Equals(si.GetNormFileName(i))) + { + normsUpToDate = false; + fieldNormsChanged[i] = true; + } + } + + // if we're cloning we need to run through the reopenSegment logic + // also if both old and new readers aren't readonly, we clone to avoid sharing modifications + if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) + { + return this; + } + + // When cloning, the incoming SegmentInfos should not + // have any changes in it: + System.Diagnostics.Debug.Assert(!doClone ||(normsUpToDate && deletionsUpToDate)); + + // clone reader + SegmentReader clone = openReadOnly ? new ReadOnlySegmentReader() : new SegmentReader(); + + bool success = false; + try + { + core.IncRef(); + clone.core = core; + clone.readOnly = openReadOnly; + clone.si = si; + clone.readBufferSize = readBufferSize; + + if (!openReadOnly && hasChanges) + { + // My pending changes transfer to the new reader + clone.pendingDeleteCount = pendingDeleteCount; + clone.deletedDocsDirty = deletedDocsDirty; + clone.normsDirty = normsDirty; + clone.hasChanges = hasChanges; + hasChanges = false; + } + + if (doClone) + { + if (deletedDocs != null) + { + deletedDocsRef.IncRef(); + clone.deletedDocs = deletedDocs; + clone.deletedDocsRef = deletedDocsRef; + } + } + else + { + if (!deletionsUpToDate) + { + // load deleted docs + System.Diagnostics.Debug.Assert(clone.deletedDocs == null); + clone.LoadDeletedDocs(); + } + else if (deletedDocs != null) + { + deletedDocsRef.IncRef(); + clone.deletedDocs = deletedDocs; + clone.deletedDocsRef = deletedDocsRef; + } + } + + clone.norms = new HashMap(); + + // Clone norms + for (int i = 0; i < fieldNormsChanged.Length; i++) + { + + // Clone unchanged norms to the cloned reader + if (doClone || !fieldNormsChanged[i]) + { + System.String curField = core.fieldInfos.FieldInfo(i).name; + Norm norm = this.norms[curField]; + if (norm != null) + clone.norms[curField] = (Norm)norm.Clone(); + } + } + + // If we are not cloning, then this will open anew + // any norms that have changed: + clone.OpenNorms(si.GetUseCompoundFile()?core.GetCFSReader():Directory(), readBufferSize); + + success = true; + } + finally + { + if (!success) + { + // An exception occured during reopen, we have to decRef the norms + // that we incRef'ed already and close singleNormsStream and FieldsReader + clone.DecRef(); + } + } + + return clone; + } + } + + protected internal override void DoCommit(System.Collections.Generic.IDictionary commitUserData) + { + if (hasChanges) + { + StartCommit(); + bool success = false; + try + { + CommitChanges(commitUserData); + success = true; + } + finally + { + if (!success) + { + RollbackCommit(); + } + } + } + } + + private void CommitChanges(System.Collections.Generic.IDictionary commitUserData) + { + if (deletedDocsDirty) + { // re-write deleted + si.AdvanceDelGen(); + + // We can write directly to the actual name (vs to a + // .tmp & renaming it) because the file is not live + // until segments file is written: + string delFileName = si.GetDelFileName(); + bool success = false; + try + { + deletedDocs.Write(Directory(), delFileName); + success = true; + } + finally + { + if (!success) + { + try + { + Directory().DeleteFile(delFileName); + } + catch (Exception) + { + // suppress this so we keep throwing the + // original exception + } + } + } + + si.SetDelCount(si.GetDelCount() + pendingDeleteCount); + pendingDeleteCount = 0; + System.Diagnostics.Debug.Assert(deletedDocs.Count() == si.GetDelCount(), "delete count mismatch during commit: info=" + si.GetDelCount() + " vs BitVector=" + deletedDocs.Count()); + } + else + { + System.Diagnostics.Debug.Assert(pendingDeleteCount == 0); + } + + if (normsDirty) + { // re-write norms + si.SetNumFields(core.fieldInfos.Size()); + foreach (Norm norm in norms.Values) + { + if (norm.dirty) + { + norm.ReWrite(si); + } + } + } + deletedDocsDirty = false; + normsDirty = false; + hasChanges = false; + } + + internal virtual FieldsReader GetFieldsReader() + { + return fieldsReaderLocal.Get(); + } + + protected internal override void DoClose() + { + termVectorsLocal.Close(); + fieldsReaderLocal.Close(); + + if (deletedDocs != null) + { + deletedDocsRef.DecRef(); + // null so if an app hangs on to us we still free most ram + deletedDocs = null; + } + + foreach(Norm norm in norms.Values) + { + norm.DecRef(); + } + if (core != null) + { + core.DecRef(); + } + } + + //internal static bool HasDeletions(SegmentInfo si) + //{ + // // Don't call ensureOpen() here (it could affect performance) + // return si.HasDeletions(); + //} + + public override bool HasDeletions + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return deletedDocs != null; + } + } + + internal static bool UsesCompoundFile(SegmentInfo si) + { + return si.GetUseCompoundFile(); + } + + internal static bool HasSeparateNorms(SegmentInfo si) + { + return si.HasSeparateNorms(); + } + + protected internal override void DoDelete(int docNum) + { + if (deletedDocs == null) + { + deletedDocs = new BitVector(MaxDoc); + deletedDocsRef = new Ref(); + } + // there is more than 1 SegmentReader with a reference to this + // deletedDocs BitVector so decRef the current deletedDocsRef, + // clone the BitVector, create a new deletedDocsRef + if (deletedDocsRef.RefCount() > 1) + { + Ref oldRef = deletedDocsRef; + deletedDocs = CloneDeletedDocs(deletedDocs); + deletedDocsRef = new Ref(); + oldRef.DecRef(); + } + deletedDocsDirty = true; + if (!deletedDocs.GetAndSet(docNum)) + pendingDeleteCount++; + } + + protected internal override void DoUndeleteAll() + { + deletedDocsDirty = false; + if (deletedDocs != null) + { + System.Diagnostics.Debug.Assert(deletedDocsRef != null); + deletedDocsRef.DecRef(); + deletedDocs = null; + deletedDocsRef = null; + pendingDeleteCount = 0; + si.ClearDelGen(); + si.SetDelCount(0); + } + else + { + System.Diagnostics.Debug.Assert(deletedDocsRef == null); + System.Diagnostics.Debug.Assert(pendingDeleteCount == 0); + } + } + + internal virtual System.Collections.Generic.IList Files() + { + return si.Files(); + } + + public override TermEnum Terms() + { + EnsureOpen(); + return core.GetTermsReader().Terms(); + } + + public override TermEnum Terms(Term t) + { + EnsureOpen(); + return core.GetTermsReader().Terms(t); + } + + public /*internal*/ virtual FieldInfos FieldInfos() + { + return core.fieldInfos; + } + + public override Document Document(int n, FieldSelector fieldSelector) + { + EnsureOpen(); + return GetFieldsReader().Doc(n, fieldSelector); + } + + public override bool IsDeleted(int n) + { + lock (this) + { + return (deletedDocs != null && deletedDocs.Get(n)); + } + } + + public override TermDocs TermDocs(Term term) + { + if (term == null) + { + return new AllTermDocs(this); + } + else + { + return base.TermDocs(term); + } + } + + public override TermDocs TermDocs() + { + EnsureOpen(); + return new SegmentTermDocs(this); + } + + public override TermPositions TermPositions() + { + EnsureOpen(); + return new SegmentTermPositions(this); + } + + public override int DocFreq(Term t) + { + EnsureOpen(); + TermInfo ti = core.GetTermsReader().Get(t); + if (ti != null) + return ti.docFreq; + else + return 0; + } + + public override int NumDocs() + { + // Don't call ensureOpen() here (it could affect performance) + int n = MaxDoc; + if (deletedDocs != null) + n -= deletedDocs.Count(); + return n; + } + + public override int MaxDoc + { + get + { + // Don't call ensureOpen() here (it could affect performance) + return si.docCount; + } + } + + /// + /// + public override System.Collections.Generic.ICollection GetFieldNames(IndexReader.FieldOption fieldOption) + { + EnsureOpen(); + + System.Collections.Generic.ISet fieldSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); + for (int i = 0; i < core.fieldInfos.Size(); i++) + { + FieldInfo fi = core.fieldInfos.FieldInfo(i); + if (fieldOption == IndexReader.FieldOption.ALL) + { + fieldSet.Add(fi.name); + } + else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) + { + fieldSet.Add(fi.name); + } + else if (fi.omitTermFreqAndPositions && fieldOption == IndexReader.FieldOption.OMIT_TERM_FREQ_AND_POSITIONS) + { + fieldSet.Add(fi.name); + } + else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) + { + fieldSet.Add(fi.name); + } + else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) + { + fieldSet.Add(fi.name); + } + else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) + { + fieldSet.Add(fi.name); + } + else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR) + { + fieldSet.Add(fi.name); + } + else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) + { + fieldSet.Add(fi.name); + } + else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) + { + fieldSet.Add(fi.name); + } + else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) + { + fieldSet.Add(fi.name); + } + else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) + { + fieldSet.Add(fi.name); + } + } + return fieldSet; + } + + + public override bool HasNorms(System.String field) + { + lock (this) + { + EnsureOpen(); + return norms.ContainsKey(field); + } + } + + // can return null if norms aren't stored + protected internal virtual byte[] GetNorms(System.String field) + { + lock (this) + { + Norm norm = norms[field]; + if (norm == null) + return null; // not indexed, or norms not stored + return norm.Bytes(); + } + } + + // returns fake norms if norms aren't available + public override byte[] Norms(System.String field) + { + lock (this) + { + EnsureOpen(); + byte[] bytes = GetNorms(field); + return bytes; + } + } + + protected internal override void DoSetNorm(int doc, System.String field, byte value_Renamed) + { + Norm norm = norms[field]; + if (norm == null) + // not an indexed field + return ; + + normsDirty = true; + norm.CopyOnWrite()[doc] = value_Renamed; // set the value + } + + /// Read norms into a pre-allocated array. + public override void Norms(System.String field, byte[] bytes, int offset) + { + lock (this) + { + + EnsureOpen(); + Norm norm = norms[field]; + if (norm == null) + { + for (int i = offset; i < bytes.Length; i++) + { + bytes[i] = (byte) DefaultSimilarity.EncodeNorm(1.0f); + } + return ; + } + + norm.Bytes(bytes, offset, MaxDoc); + } + } + + + private void OpenNorms(Directory cfsDir, int readBufferSize) + { + long nextNormSeek = SegmentMerger.NORMS_HEADER.Length; //skip header (header unused for now) + int maxDoc = MaxDoc; + for (int i = 0; i < core.fieldInfos.Size(); i++) + { + FieldInfo fi = core.fieldInfos.FieldInfo(i); + if (norms.ContainsKey(fi.name)) + { + // in case this SegmentReader is being re-opened, we might be able to + // reuse some norm instances and skip loading them here + continue; + } + if (fi.isIndexed && !fi.omitNorms) + { + Directory d = Directory(); + System.String fileName = si.GetNormFileName(fi.number); + if (!si.HasSeparateNorms(fi.number)) + { + d = cfsDir; + } + + // singleNormFile means multiple norms share this file + bool singleNormFile = fileName.EndsWith("." + IndexFileNames.NORMS_EXTENSION); + IndexInput normInput = null; + long normSeek; + + if (singleNormFile) + { + normSeek = nextNormSeek; + if (singleNormStream == null) + { + singleNormStream = d.OpenInput(fileName, readBufferSize); + singleNormRef = new Ref(); + } + else + { + singleNormRef.IncRef(); + } + // All norms in the .nrm file can share a single IndexInput since + // they are only used in a synchronized context. + // If this were to change in the future, a clone could be done here. + normInput = singleNormStream; + } + else + { + normSeek = 0; + normInput = d.OpenInput(fileName); + } + + norms[fi.name] = new Norm(this, normInput, fi.number, normSeek); + nextNormSeek += maxDoc; // increment also if some norms are separate + } + } + } + + public /*internal*/ virtual bool TermsIndexLoaded() + { + return core.TermsIndexIsLoaded(); + } + + // NOTE: only called from IndexWriter when a near + // real-time reader is opened, or applyDeletes is run, + // sharing a segment that's still being merged. This + // method is not thread safe, and relies on the + // synchronization in IndexWriter + internal virtual void LoadTermsIndex(int termsIndexDivisor) + { + core.LoadTermsIndex(si, termsIndexDivisor); + } + + // for testing only + public /*internal*/ virtual bool NormsClosed() + { + if (singleNormStream != null) + { + return false; + } + return norms.Values.All(norm => norm.refCount <= 0); + } + + // for testing only + public /*internal*/ virtual bool NormsClosed(System.String field) + { + return norms[field].refCount == 0; + } + + /// Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. + /// TermVectorsReader + /// + internal virtual TermVectorsReader GetTermVectorsReader() + { + TermVectorsReader tvReader = termVectorsLocal.Get(); + if (tvReader == null) + { + TermVectorsReader orig = core.GetTermVectorsReaderOrig(); + if (orig == null) + { + return null; + } + else + { + try + { + tvReader = (TermVectorsReader) orig.Clone(); + } + catch (System.Exception) + { + return null; + } + } + termVectorsLocal.Set(tvReader); + } + return tvReader; + } + + internal virtual TermVectorsReader GetTermVectorsReaderOrig() + { + return core.GetTermVectorsReaderOrig(); + } + + /// Return a term frequency vector for the specified document and field. The + /// vector returned contains term numbers and frequencies for all terms in + /// the specified field of this document, if the field had storeTermVector + /// flag set. If the flag was not set, the method returns null. + /// + /// IOException + public override ITermFreqVector GetTermFreqVector(int docNumber, System.String field) + { + // Check if this field is invalid or has no stored term vector + EnsureOpen(); + FieldInfo fi = core.fieldInfos.FieldInfo(field); + if (fi == null || !fi.storeTermVector) + return null; + + TermVectorsReader termVectorsReader = GetTermVectorsReader(); + if (termVectorsReader == null) + return null; + + return termVectorsReader.Get(docNumber, field); + } + + + public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper) + { + EnsureOpen(); + FieldInfo fi = core.fieldInfos.FieldInfo(field); + if (fi == null || !fi.storeTermVector) + return; + + TermVectorsReader termVectorsReader = GetTermVectorsReader(); + if (termVectorsReader == null) + { + return; + } + termVectorsReader.Get(docNumber, field, mapper); + } + + + public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper) + { + EnsureOpen(); + + TermVectorsReader termVectorsReader = GetTermVectorsReader(); + if (termVectorsReader == null) + return ; + + termVectorsReader.Get(docNumber, mapper); + } + + /// Return an array of term frequency vectors for the specified document. + /// The array contains a vector for each vectorized field in the document. + /// Each vector vector contains term numbers and frequencies for all terms + /// in a given vectorized field. + /// If no such fields existed, the method returns null. + /// + /// IOException + public override ITermFreqVector[] GetTermFreqVectors(int docNumber) + { + EnsureOpen(); + + TermVectorsReader termVectorsReader = GetTermVectorsReader(); + if (termVectorsReader == null) + return null; + + return termVectorsReader.Get(docNumber); + } + + /// Return the name of the segment this reader is reading. + public virtual string SegmentName + { + get { return core.segment; } + } + + /// Return the SegmentInfo of the segment this reader is reading. + internal virtual SegmentInfo SegmentInfo + { + get { return si; } + set { si = value; } + } + + internal virtual void StartCommit() + { + rollbackSegmentInfo = (SegmentInfo)si.Clone(); + rollbackHasChanges = hasChanges; + rollbackDeletedDocsDirty = deletedDocsDirty; + rollbackNormsDirty = normsDirty; + rollbackPendingDeleteCount = pendingDeleteCount; + foreach(Norm norm in norms.Values) + { + norm.rollbackDirty = norm.dirty; + } + } + + internal virtual void RollbackCommit() + { + si.Reset(rollbackSegmentInfo); + hasChanges = rollbackHasChanges; + deletedDocsDirty = rollbackDeletedDocsDirty; + normsDirty = rollbackNormsDirty; + pendingDeleteCount = rollbackPendingDeleteCount; + foreach(Norm norm in norms.Values) + { + norm.dirty = norm.rollbackDirty; + } + } + + /// Returns the directory this index resides in. + public override Directory Directory() + { + // Don't ensureOpen here -- in certain cases, when a + // cloned/reopened reader needs to commit, it may call + // this method on the closed original reader + return core.dir; + } + + // This is necessary so that cloned SegmentReaders (which + // share the underlying postings data) will map to the + // same entry in the FieldCache. See LUCENE-1579. + + public override object FieldCacheKey + { + get { return core.freqStream; } + } + + public override object DeletesCacheKey + { + get { return deletedDocs; } + } + + + public override long UniqueTermCount + { + get { return core.GetTermsReader().Size(); } + } + + /// Lotsa tests did hacks like:
+ /// SegmentReader reader = (SegmentReader) IndexReader.open(dir);
+ /// They broke. This method serves as a hack to keep hacks working + /// We do it with R/W access for the tests (BW compatibility) + ///
+ [Obsolete("Remove this when tests are fixed!")] + public /*internal*/ static SegmentReader GetOnlySegmentReader(Directory dir) + { + return GetOnlySegmentReader(IndexReader.Open(dir,false)); + } + + public /*internal*/ static SegmentReader GetOnlySegmentReader(IndexReader reader) + { + var onlySegmentReader = reader as SegmentReader; + if (onlySegmentReader != null) + return onlySegmentReader; + + if (reader is DirectoryReader) + { + IndexReader[] subReaders = reader.GetSequentialSubReaders(); + if (subReaders.Length != 1) + { + throw new System.ArgumentException(reader + " has " + subReaders.Length + " segments instead of exactly one"); + } + + return (SegmentReader) subReaders[0]; + } + + throw new System.ArgumentException(reader + " is not a SegmentReader or a single-segment DirectoryReader"); + } + + public override int TermInfosIndexDivisor + { + get { return core.termsIndexDivisor; } + } + + public System.Collections.Generic.IDictionary norms_ForNUnit + { + get { return norms; } + } + + public BitVector deletedDocs_ForNUnit + { + get { return deletedDocs; } + } + + public CoreReaders core_ForNUnit + { + get { return core; } + } + + public Ref deletedDocsRef_ForNUnit + { + get { return deletedDocsRef; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentTermDocs.cs b/external/Lucene.Net.Light/src/core/Index/SegmentTermDocs.cs new file mode 100644 index 0000000000..f7efde648b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentTermDocs.cs @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using IndexInput = Lucene.Net.Store.IndexInput; +using BitVector = Lucene.Net.Util.BitVector; + +namespace Lucene.Net.Index +{ + + internal class SegmentTermDocs : TermDocs + { + protected internal SegmentReader parent; + protected internal IndexInput freqStream; + protected internal int count; + protected internal int df; + protected internal BitVector deletedDocs; + internal int doc = 0; + internal int freq; + + private readonly int skipInterval; + private readonly int maxSkipLevels; + private DefaultSkipListReader skipListReader; + + private long freqBasePointer; + private long proxBasePointer; + + private long skipPointer; + private bool haveSkipped; + + protected internal bool currentFieldStoresPayloads; + protected internal bool currentFieldOmitTermFreqAndPositions; + + private bool isDisposed; + + public /*protected internal*/ SegmentTermDocs(SegmentReader parent) + { + this.parent = parent; + this.freqStream = (IndexInput) parent.core.freqStream.Clone(); + lock (parent) + { + this.deletedDocs = parent.deletedDocs; + } + this.skipInterval = parent.core.GetTermsReader().SkipInterval; + this.maxSkipLevels = parent.core.GetTermsReader().MaxSkipLevels; + } + + public virtual void Seek(Term term) + { + TermInfo ti = parent.core.GetTermsReader().Get(term); + Seek(ti, term); + } + + public virtual void Seek(TermEnum termEnum) + { + TermInfo ti; + Term term; + + // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs + if (termEnum is SegmentTermEnum && ((SegmentTermEnum) termEnum).fieldInfos == parent.core.fieldInfos) + { + // optimized case + var segmentTermEnum = ((SegmentTermEnum) termEnum); + term = segmentTermEnum.Term; + ti = segmentTermEnum.TermInfo(); + } + else + { + // punt case + term = termEnum.Term; + ti = parent.core.GetTermsReader().Get(term); + } + + Seek(ti, term); + } + + internal virtual void Seek(TermInfo ti, Term term) + { + count = 0; + FieldInfo fi = parent.core.fieldInfos.FieldInfo(term.Field); + currentFieldOmitTermFreqAndPositions = (fi != null) && fi.omitTermFreqAndPositions; + currentFieldStoresPayloads = (fi != null) && fi.storePayloads; + if (ti == null) + { + df = 0; + } + else + { + df = ti.docFreq; + doc = 0; + freqBasePointer = ti.freqPointer; + proxBasePointer = ti.proxPointer; + skipPointer = freqBasePointer + ti.skipOffset; + freqStream.Seek(freqBasePointer); + haveSkipped = false; + } + } + + public void Dispose() + { + Dispose(true); + } + + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + protected virtual void Dispose(bool disposing) + { + if (isDisposed) return; + + freqStream.Dispose(); + if (skipListReader != null) + skipListReader.Dispose(); + + isDisposed = true; + } + + public int Doc + { + get { return doc; } + } + + public int Freq + { + get { return freq; } + } + + protected internal virtual void SkippingDoc() + { + } + + public virtual bool Next() + { + while (true) + { + if (count == df) + return false; + int docCode = freqStream.ReadVInt(); + + if (currentFieldOmitTermFreqAndPositions) + { + doc += docCode; + freq = 1; + } + else + { + doc += Number.URShift(docCode, 1); // shift off low bit + if ((docCode & 1) != 0) + // if low bit is set + freq = 1; + // freq is one + else + freq = freqStream.ReadVInt(); // else read freq + } + + count++; + + if (deletedDocs == null || !deletedDocs.Get(doc)) + break; + SkippingDoc(); + } + return true; + } + + /// Optimized implementation. + public virtual int Read(int[] docs, int[] freqs) + { + int length = docs.Length; + if (currentFieldOmitTermFreqAndPositions) + { + return ReadNoTf(docs, freqs, length); + } + else + { + int i = 0; + while (i < length && count < df) + { + // manually inlined call to next() for speed + int docCode = freqStream.ReadVInt(); + doc += Number.URShift(docCode, 1); // shift off low bit + if ((docCode & 1) != 0) + // if low bit is set + freq = 1; + // freq is one + else + freq = freqStream.ReadVInt(); // else read freq + count++; + + if (deletedDocs == null || !deletedDocs.Get(doc)) + { + docs[i] = doc; + freqs[i] = freq; + ++i; + } + } + return i; + } + } + + private int ReadNoTf(int[] docs, int[] freqs, int length) + { + int i = 0; + while (i < length && count < df) + { + // manually inlined call to next() for speed + doc += freqStream.ReadVInt(); + count++; + + if (deletedDocs == null || !deletedDocs.Get(doc)) + { + docs[i] = doc; + // Hardware freq to 1 when term freqs were not + // stored in the index + freqs[i] = 1; + ++i; + } + } + return i; + } + + + /// Overridden by SegmentTermPositions to skip in prox stream. + protected internal virtual void SkipProx(long proxPointer, int payloadLength) + { + } + + /// Optimized implementation. + public virtual bool SkipTo(int target) + { + if (df >= skipInterval) + { + // optimized case + if (skipListReader == null) + skipListReader = new DefaultSkipListReader((IndexInput) freqStream.Clone(), maxSkipLevels, skipInterval); // lazily clone + + if (!haveSkipped) + { + // lazily initialize skip stream + skipListReader.Init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads); + haveSkipped = true; + } + + int newCount = skipListReader.SkipTo(target); + if (newCount > count) + { + freqStream.Seek(skipListReader.GetFreqPointer()); + SkipProx(skipListReader.GetProxPointer(), skipListReader.GetPayloadLength()); + + doc = skipListReader.GetDoc(); + count = newCount; + } + } + + // done skipping, now just scan + do + { + if (!Next()) + return false; + } + while (target > doc); + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentTermEnum.cs b/external/Lucene.Net.Light/src/core/Index/SegmentTermEnum.cs new file mode 100644 index 0000000000..77224df77d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentTermEnum.cs @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using IndexInput = Lucene.Net.Store.IndexInput; + +namespace Lucene.Net.Index +{ + internal sealed class SegmentTermEnum : TermEnum, System.ICloneable + { + private IndexInput input; + internal FieldInfos fieldInfos; + internal long size; + internal long position = - 1; + + private TermBuffer termBuffer = new TermBuffer(); + private TermBuffer prevBuffer = new TermBuffer(); + private TermBuffer scanBuffer = new TermBuffer(); // used for scanning + + private TermInfo termInfo = new TermInfo(); + + private readonly int format; + private readonly bool isIndex = false; + internal long indexPointer = 0; + internal int indexInterval; + internal int skipInterval; + internal int maxSkipLevels; + private readonly int formatM1SkipInterval; + + internal SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) + { + input = i; + fieldInfos = fis; + isIndex = isi; + maxSkipLevels = 1; // use single-level skip lists for formats > -3 + + int firstInt = input.ReadInt(); + if (firstInt >= 0) + { + // original-format file, without explicit format version number + format = 0; + size = firstInt; + + // back-compatible settings + indexInterval = 128; + skipInterval = System.Int32.MaxValue; // switch off skipTo optimization + } + else + { + // we have a format version number + format = firstInt; + + // check that it is a format we can understand + if (format < TermInfosWriter.FORMAT_CURRENT) + throw new CorruptIndexException("Unknown format version:" + format + " expected " + TermInfosWriter.FORMAT_CURRENT + " or higher"); + + size = input.ReadLong(); // read the size + + if (format == - 1) + { + if (!isIndex) + { + indexInterval = input.ReadInt(); + formatM1SkipInterval = input.ReadInt(); + } + // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in + // skipTo implementation of these versions + skipInterval = System.Int32.MaxValue; + } + else + { + indexInterval = input.ReadInt(); + skipInterval = input.ReadInt(); + if (format <= TermInfosWriter.FORMAT) + { + // this new format introduces multi-level skipping + maxSkipLevels = input.ReadInt(); + } + } + System.Diagnostics.Debug.Assert(indexInterval > 0, "indexInterval=" + indexInterval + " is negative; must be > 0"); + System.Diagnostics.Debug.Assert(skipInterval > 0, "skipInterval=" + skipInterval + " is negative; must be > 0"); + } + if (format > TermInfosWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) + { + termBuffer.SetPreUTF8Strings(); + scanBuffer.SetPreUTF8Strings(); + prevBuffer.SetPreUTF8Strings(); + } + } + + public System.Object Clone() + { + SegmentTermEnum clone = null; + try + { + clone = (SegmentTermEnum) base.MemberwiseClone(); + } + catch (System.Exception) + { + } + + clone.input = (IndexInput) input.Clone(); + clone.termInfo = new TermInfo(termInfo); + + clone.termBuffer = (TermBuffer) termBuffer.Clone(); + clone.prevBuffer = (TermBuffer) prevBuffer.Clone(); + clone.scanBuffer = new TermBuffer(); + + return clone; + } + + internal void Seek(long pointer, long p, Term t, TermInfo ti) + { + input.Seek(pointer); + position = p; + termBuffer.Set(t); + prevBuffer.Reset(); + termInfo.Set(ti); + } + + /// Increments the enumeration to the next element. True if one exists. + public override bool Next() + { + if (position++ >= size - 1) + { + prevBuffer.Set(termBuffer); + termBuffer.Reset(); + return false; + } + + prevBuffer.Set(termBuffer); + termBuffer.Read(input, fieldInfos); + + termInfo.docFreq = input.ReadVInt(); // read doc freq + termInfo.freqPointer += input.ReadVLong(); // read freq pointer + termInfo.proxPointer += input.ReadVLong(); // read prox pointer + + if (format == - 1) + { + // just read skipOffset in order to increment file pointer; + // value is never used since skipTo is switched off + if (!isIndex) + { + if (termInfo.docFreq > formatM1SkipInterval) + { + termInfo.skipOffset = input.ReadVInt(); + } + } + } + else + { + if (termInfo.docFreq >= skipInterval) + termInfo.skipOffset = input.ReadVInt(); + } + + if (isIndex) + indexPointer += input.ReadVLong(); // read index pointer + + return true; + } + + /// Optimized scan, without allocating new terms. + /// Return number of invocations to next(). + /// + internal int ScanTo(Term term) + { + scanBuffer.Set(term); + int count = 0; + while (scanBuffer.CompareTo(termBuffer) > 0 && Next()) + { + count++; + } + return count; + } + + /// Returns the current Term in the enumeration. + /// Initially invalid, valid after next() called for the first time. + /// + public override Term Term + { + get { return termBuffer.ToTerm(); } + } + + /// Returns the previous Term enumerated. Initially null. + public /*internal*/ Term Prev() + { + return prevBuffer.ToTerm(); + } + + /// Returns the current TermInfo in the enumeration. + /// Initially invalid, valid after next() called for the first time. + /// + internal TermInfo TermInfo() + { + return new TermInfo(termInfo); + } + + /// Sets the argument to the current TermInfo in the enumeration. + /// Initially invalid, valid after next() called for the first time. + /// + internal void TermInfo(TermInfo ti) + { + ti.Set(termInfo); + } + + /// Returns the docFreq from the current TermInfo in the enumeration. + /// Initially invalid, valid after next() called for the first time. + /// + public override int DocFreq() + { + return termInfo.docFreq; + } + + /* Returns the freqPointer from the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time.*/ + internal long FreqPointer() + { + return termInfo.freqPointer; + } + + /* Returns the proxPointer from the current TermInfo in the enumeration. + Initially invalid, valid after next() called for the first time.*/ + internal long ProxPointer() + { + return termInfo.proxPointer; + } + + /// Closes the enumeration to further activity, freeing resources. + protected override void Dispose(bool disposing) + { + input.Dispose(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentTermPositionVector.cs b/external/Lucene.Net.Light/src/core/Index/SegmentTermPositionVector.cs new file mode 100644 index 0000000000..b4304196c1 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentTermPositionVector.cs @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + class SegmentTermPositionVector:SegmentTermVector, TermPositionVector + { + protected internal int[][] positions; + protected internal TermVectorOffsetInfo[][] offsets; + public static readonly int[] EMPTY_TERM_POS = new int[0]; + + public SegmentTermPositionVector(System.String field, System.String[] terms, int[] termFreqs, int[][] positions, TermVectorOffsetInfo[][] offsets):base(field, terms, termFreqs) + { + this.offsets = offsets; + this.positions = positions; + } + + /// Returns an array of TermVectorOffsetInfo in which the term is found. + /// + /// + /// The position in the array to get the offsets from + /// + /// An array of TermVectorOffsetInfo objects or the empty list + /// + /// + /// + public virtual TermVectorOffsetInfo[] GetOffsets(int index) + { + TermVectorOffsetInfo[] result = TermVectorOffsetInfo.EMPTY_OFFSET_INFO; + if (offsets == null) + return null; + if (index >= 0 && index < offsets.Length) + { + result = offsets[index]; + } + return result; + } + + /// Returns an array of positions in which the term is found. + /// Terms are identified by the index at which its number appears in the + /// term String array obtained from the indexOf method. + /// + public virtual int[] GetTermPositions(int index) + { + int[] result = EMPTY_TERM_POS; + if (positions == null) + return null; + if (index >= 0 && index < positions.Length) + { + result = positions[index]; + } + + return result; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentTermPositions.cs b/external/Lucene.Net.Light/src/core/Index/SegmentTermPositions.cs new file mode 100644 index 0000000000..7c5856c40f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentTermPositions.cs @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using IndexInput = Lucene.Net.Store.IndexInput; + +namespace Lucene.Net.Index +{ + internal sealed class SegmentTermPositions : SegmentTermDocs, TermPositions + { + private IndexInput proxStream; + private int proxCount; + private int position; + + // the current payload length + private int payloadLength; + // indicates whether the payload of the currend position has + // been read from the proxStream yet + private bool needToLoadPayload; + + // these variables are being used to remember information + // for a lazy skip + private long lazySkipPointer = - 1; + private int lazySkipProxCount = 0; + + internal SegmentTermPositions(SegmentReader p):base(p) + { + this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time + } + + internal override void Seek(TermInfo ti, Term term) + { + base.Seek(ti, term); + if (ti != null) + lazySkipPointer = ti.proxPointer; + + lazySkipProxCount = 0; + proxCount = 0; + payloadLength = 0; + needToLoadPayload = false; + } + + protected override void Dispose(bool disposing) + { + base.Dispose(disposing); + if (proxStream != null) + proxStream.Dispose(); + } + + public int NextPosition() + { + if (currentFieldOmitTermFreqAndPositions) + // This field does not store term freq, positions, payloads + return 0; + // perform lazy skips if neccessary + LazySkip(); + proxCount--; + return position += ReadDeltaPosition(); + } + + private int ReadDeltaPosition() + { + int delta = proxStream.ReadVInt(); + if (currentFieldStoresPayloads) + { + // if the current field stores payloads then + // the position delta is shifted one bit to the left. + // if the LSB is set, then we have to read the current + // payload length + if ((delta & 1) != 0) + { + payloadLength = proxStream.ReadVInt(); + } + delta = Number.URShift(delta, 1); + needToLoadPayload = true; + } + return delta; + } + + protected internal override void SkippingDoc() + { + // we remember to skip a document lazily + lazySkipProxCount += freq; + } + + public override bool Next() + { + // we remember to skip the remaining positions of the current + // document lazily + lazySkipProxCount += proxCount; + + if (base.Next()) + { + // run super + proxCount = freq; // note frequency + position = 0; // reset position + return true; + } + return false; + } + + public override int Read(int[] docs, int[] freqs) + { + throw new System.NotSupportedException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead."); + } + + + /// Called by super.skipTo(). + protected internal override void SkipProx(long proxPointer, int payloadLength) + { + // we save the pointer, we might have to skip there lazily + lazySkipPointer = proxPointer; + lazySkipProxCount = 0; + proxCount = 0; + this.payloadLength = payloadLength; + needToLoadPayload = false; + } + + private void SkipPositions(int n) + { + System.Diagnostics.Debug.Assert(!currentFieldOmitTermFreqAndPositions); + for (int f = n; f > 0; f--) + { + // skip unread positions + ReadDeltaPosition(); + SkipPayload(); + } + } + + private void SkipPayload() + { + if (needToLoadPayload && payloadLength > 0) + { + proxStream.Seek(proxStream.FilePointer + payloadLength); + } + needToLoadPayload = false; + } + + // It is not always neccessary to move the prox pointer + // to a new document after the freq pointer has been moved. + // Consider for example a phrase query with two terms: + // the freq pointer for term 1 has to move to document x + // to answer the question if the term occurs in that document. But + // only if term 2 also matches document x, the positions have to be + // read to figure out if term 1 and term 2 appear next + // to each other in document x and thus satisfy the query. + // So we move the prox pointer lazily to the document + // as soon as positions are requested. + private void LazySkip() + { + if (proxStream == null) + { + // clone lazily + proxStream = (IndexInput) parent.core.proxStream.Clone(); + } + + // we might have to skip the current payload + // if it was not read yet + SkipPayload(); + + if (lazySkipPointer != - 1) + { + proxStream.Seek(lazySkipPointer); + lazySkipPointer = - 1; + } + + if (lazySkipProxCount != 0) + { + SkipPositions(lazySkipProxCount); + lazySkipProxCount = 0; + } + } + + public int PayloadLength + { + get { return payloadLength; } + } + + public byte[] GetPayload(byte[] data, int offset) + { + if (!needToLoadPayload) + { + throw new System.IO.IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); + } + + // read payloads lazily + byte[] retArray; + int retOffset; + if (data == null || data.Length - offset < payloadLength) + { + // the array is too small to store the payload data, + // so we allocate a new one + retArray = new byte[payloadLength]; + retOffset = 0; + } + else + { + retArray = data; + retOffset = offset; + } + proxStream.ReadBytes(retArray, retOffset, payloadLength); + needToLoadPayload = false; + return retArray; + } + + public bool IsPayloadAvailable + { + get { return needToLoadPayload && payloadLength > 0; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentTermVector.cs b/external/Lucene.Net.Light/src/core/Index/SegmentTermVector.cs new file mode 100644 index 0000000000..35bcc9510a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentTermVector.cs @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + + class SegmentTermVector : ITermFreqVector + { + private System.String field; + private System.String[] terms; + private int[] termFreqs; + + internal SegmentTermVector(System.String field, System.String[] terms, int[] termFreqs) + { + this.field = field; + this.terms = terms; + this.termFreqs = termFreqs; + } + + /// + /// The number of the field this vector is associated with + public virtual string Field + { + get { return field; } + } + + public override System.String ToString() + { + System.Text.StringBuilder sb = new System.Text.StringBuilder(); + sb.Append('{'); + sb.Append(field).Append(": "); + if (terms != null) + { + for (int i = 0; i < terms.Length; i++) + { + if (i > 0) + sb.Append(", "); + sb.Append(terms[i]).Append('/').Append(termFreqs[i]); + } + } + sb.Append('}'); + + return sb.ToString(); + } + + public virtual int Size + { + get { return terms == null ? 0 : terms.Length; } + } + + public virtual System.String[] GetTerms() + { + return terms; + } + + public virtual int[] GetTermFrequencies() + { + return termFreqs; + } + + public virtual int IndexOf(System.String termText) + { + if (terms == null) + return - 1; + int res = System.Array.BinarySearch(terms, termText, System.StringComparer.Ordinal); + return res >= 0?res:- 1; + } + + public virtual int[] IndexesOf(System.String[] termNumbers, int start, int len) + { + // TODO: there must be a more efficient way of doing this. + // At least, we could advance the lower bound of the terms array + // as we find valid indexes. Also, it might be possible to leverage + // this even more by starting in the middle of the termNumbers array + // and thus dividing the terms array maybe in half with each found index. + int[] res = new int[len]; + + for (int i = 0; i < len; i++) + { + res[i] = IndexOf(termNumbers[start + i]); + } + return res; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SegmentWriteState.cs b/external/Lucene.Net.Light/src/core/Index/SegmentWriteState.cs new file mode 100644 index 0000000000..09db9e147c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SegmentWriteState.cs @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + class SegmentWriteState + { + internal DocumentsWriter docWriter; + internal Directory directory; + internal System.String segmentName; + internal System.String docStoreSegmentName; + internal int numDocs; + internal int termIndexInterval; + internal int numDocsInStore; + internal System.Collections.Generic.ICollection flushedFiles; + + public SegmentWriteState(DocumentsWriter docWriter, Directory directory, System.String segmentName, System.String docStoreSegmentName, int numDocs, int numDocsInStore, int termIndexInterval) + { + this.docWriter = docWriter; + this.directory = directory; + this.segmentName = segmentName; + this.docStoreSegmentName = docStoreSegmentName; + this.numDocs = numDocs; + this.numDocsInStore = numDocsInStore; + this.termIndexInterval = termIndexInterval; + flushedFiles = new System.Collections.Generic.HashSet(); + } + + public virtual System.String SegmentFileName(System.String ext) + { + return segmentName + "." + ext; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SerialMergeScheduler.cs b/external/Lucene.Net.Light/src/core/Index/SerialMergeScheduler.cs new file mode 100644 index 0000000000..867ee8f1ac --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SerialMergeScheduler.cs @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Index +{ + + /// A that simply does each merge + /// sequentially, using the current thread. + /// + public class SerialMergeScheduler:MergeScheduler + { + + /// Just do the merges in sequence. We do this + /// "synchronized" so that even if the application is using + /// multiple threads, only one merge may run at a time. + /// + public override void Merge(IndexWriter writer) + { + lock (this) + { + while (true) + { + MergePolicy.OneMerge merge = writer.GetNextMerge(); + if (merge == null) + break; + writer.Merge(merge); + } + } + } + + protected override void Dispose(bool disposing) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SnapshotDeletionPolicy.cs b/external/Lucene.Net.Light/src/core/Index/SnapshotDeletionPolicy.cs new file mode 100644 index 0000000000..030b6b06da --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SnapshotDeletionPolicy.cs @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + /// A that wraps around any other + /// and adds the ability to hold and + /// later release a single "snapshot" of an index. While + /// the snapshot is held, the will not + /// remove any files associated with it even if the index is + /// otherwise being actively, arbitrarily changed. Because + /// we wrap another arbitrary , this + /// gives you the freedom to continue using whatever + /// you would normally want to use with your + /// index. Note that you can re-use a single instance of + /// SnapshotDeletionPolicy across multiple writers as long + /// as they are against the same index Directory. Any + /// snapshot held when a writer is closed will "survive" + /// when the next writer is opened. + /// + ///

WARNING: This API is a new and experimental and + /// may suddenly change.

+ ///

+ + public class SnapshotDeletionPolicy : IndexDeletionPolicy + { + + private IndexCommit lastCommit; + private IndexDeletionPolicy primary; + private System.String snapshot; + + public SnapshotDeletionPolicy(IndexDeletionPolicy primary) + { + this.primary = primary; + } + + public virtual void OnInit(IList commits) where T : IndexCommit + { + lock (this) + { + primary.OnInit(WrapCommits(commits)); + lastCommit = commits[commits.Count - 1]; + } + } + + public virtual void OnCommit(IList commits) where T : IndexCommit + { + lock (this) + { + primary.OnCommit(WrapCommits(commits)); + lastCommit = commits[commits.Count - 1]; + } + } + + /// Take a snapshot of the most recent commit to the + /// index. You must call release() to free this snapshot. + /// Note that while the snapshot is held, the files it + /// references will not be deleted, which will consume + /// additional disk space in your index. If you take a + /// snapshot at a particularly bad time (say just before + /// you call optimize()) then in the worst case this could + /// consume an extra 1X of your total index size, until + /// you release the snapshot. + /// + public virtual IndexCommit Snapshot() + { + lock (this) + { + if (lastCommit == null) + { + throw new System.SystemException("no index commits to snapshot !"); + } + + if (snapshot == null) + snapshot = lastCommit.SegmentsFileName; + else + throw new System.SystemException("snapshot is already set; please call release() first"); + return lastCommit; + } + } + + /// Release the currently held snapshot. + public virtual void Release() + { + lock (this) + { + if (snapshot != null) + snapshot = null; + else + throw new System.SystemException("snapshot was not set; please call snapshot() first"); + } + } + + private class MyCommitPoint : IndexCommit + { + private void InitBlock(SnapshotDeletionPolicy enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SnapshotDeletionPolicy enclosingInstance; + public SnapshotDeletionPolicy Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal IndexCommit cp; + internal MyCommitPoint(SnapshotDeletionPolicy enclosingInstance, IndexCommit cp) + { + InitBlock(enclosingInstance); + this.cp = cp; + } + + public override string ToString() + { + return "SnapshotDeletionPolicy.SnapshotCommitPoint(" + cp + ")"; + } + + public override string SegmentsFileName + { + get { return cp.SegmentsFileName; } + } + + public override ICollection FileNames + { + get { return cp.FileNames; } + } + + public override Directory Directory + { + get { return cp.Directory; } + } + + public override void Delete() + { + lock (Enclosing_Instance) + { + // Suppress the delete request if this commit point is + // our current snapshot. + if (Enclosing_Instance.snapshot == null || !Enclosing_Instance.snapshot.Equals(SegmentsFileName)) + cp.Delete(); + } + } + + public override bool IsDeleted + { + get { return cp.IsDeleted; } + } + + public override long Version + { + get { return cp.Version; } + } + + public override long Generation + { + get { return cp.Generation; } + } + + public override IDictionary UserData + { + get { return cp.UserData; } + } + + public override bool IsOptimized + { + get { return cp.IsOptimized; } + } + } + + private IList WrapCommits(IList commits) where T : IndexCommit + { + int count = commits.Count; + var myCommits = new List(count); + for (int i = 0; i < count; i++) + { + myCommits.Add(new MyCommitPoint(this, commits[i])); + } + return myCommits; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/SortedTermVectorMapper.cs b/external/Lucene.Net.Light/src/core/Index/SortedTermVectorMapper.cs new file mode 100644 index 0000000000..3d00b377c7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/SortedTermVectorMapper.cs @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Index +{ + + /// Store a sorted collection of s. Collects all term information + /// into a single, SortedSet. + ///
+ /// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/positions you will not + /// know what Fields they correlate with. + ///
+ /// This is not thread-safe + ///
+ public class SortedTermVectorMapper:TermVectorMapper + { + private SortedSet currentSet; + private IDictionary termToTVE = new HashMap(); + private bool storeOffsets; + private bool storePositions; + /// Stand-in name for the field in . + public const System.String ALL = "_ALL_"; + + /// + /// A Comparator for sorting s + /// + public SortedTermVectorMapper(IComparer comparator) + : this(false, false, comparator) + { + } + + + public SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, IComparer comparator) + : base(ignoringPositions, ignoringOffsets) + { + currentSet = new SortedSet(comparator); + } + + /// + /// The term to map + /// + /// The frequency of the term + /// + /// Offset information, may be null + /// + /// Position information, may be null + /// + //We need to combine any previous mentions of the term + public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) + { + TermVectorEntry entry = termToTVE[term]; + if (entry == null) + { + entry = new TermVectorEntry(ALL, term, frequency, storeOffsets == true?offsets:null, storePositions == true?positions:null); + termToTVE[term] = entry; + currentSet.Add(entry); + } + else + { + entry.Frequency = entry.Frequency + frequency; + if (storeOffsets) + { + TermVectorOffsetInfo[] existingOffsets = entry.GetOffsets(); + //A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions + if (existingOffsets != null && offsets != null && offsets.Length > 0) + { + //copy over the existing offsets + TermVectorOffsetInfo[] newOffsets = new TermVectorOffsetInfo[existingOffsets.Length + offsets.Length]; + Array.Copy(existingOffsets, 0, newOffsets, 0, existingOffsets.Length); + Array.Copy(offsets, 0, newOffsets, existingOffsets.Length, offsets.Length); + entry.SetOffsets(newOffsets); + } + else if (existingOffsets == null && offsets != null && offsets.Length > 0) + { + entry.SetOffsets(offsets); + } + //else leave it alone + } + if (storePositions) + { + int[] existingPositions = entry.GetPositions(); + if (existingPositions != null && positions != null && positions.Length > 0) + { + int[] newPositions = new int[existingPositions.Length + positions.Length]; + Array.Copy(existingPositions, 0, newPositions, 0, existingPositions.Length); + Array.Copy(positions, 0, newPositions, existingPositions.Length, positions.Length); + entry.SetPositions(newPositions); + } + else if (existingPositions == null && positions != null && positions.Length > 0) + { + entry.SetPositions(positions); + } + } + } + } + + public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) + { + + this.storeOffsets = storeOffsets; + this.storePositions = storePositions; + } + + /// The TermVectorEntrySet. A SortedSet of objects. Sort is by the comparator passed into the constructor. + ///
+ /// This set will be empty until after the mapping process takes place. + /// + ///
+ /// The SortedSet of <see cref="TermVectorEntry" />. + public virtual SortedSet TermVectorEntrySet + { + get { return currentSet; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/StaleReaderException.cs b/external/Lucene.Net.Light/src/core/Index/StaleReaderException.cs new file mode 100644 index 0000000000..271070ffb5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/StaleReaderException.cs @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// This exception is thrown when an + /// tries to make changes to the index (via + ///, + /// or ) + /// but changes have already been committed to the index + /// since this reader was instantiated. When this happens + /// you must open a new reader on the current index to make + /// the changes. + /// + [Serializable] + public class StaleReaderException:System.IO.IOException + { + public StaleReaderException(System.String message):base(message) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/StoredFieldsWriter.cs b/external/Lucene.Net.Light/src/core/Index/StoredFieldsWriter.cs new file mode 100644 index 0000000000..c4548b413a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/StoredFieldsWriter.cs @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using RAMOutputStream = Lucene.Net.Store.RAMOutputStream; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; + +namespace Lucene.Net.Index +{ + + /// This is a DocFieldConsumer that writes stored fields. + sealed class StoredFieldsWriter + { + private void InitBlock() + { + docFreeList = new PerDoc[1]; + } + + internal FieldsWriter fieldsWriter; + internal DocumentsWriter docWriter; + internal FieldInfos fieldInfos; + internal int lastDocID; + + internal PerDoc[] docFreeList; + internal int freeCount; + + public StoredFieldsWriter(DocumentsWriter docWriter, FieldInfos fieldInfos) + { + InitBlock(); + this.docWriter = docWriter; + this.fieldInfos = fieldInfos; + } + + public StoredFieldsWriterPerThread AddThread(DocumentsWriter.DocState docState) + { + return new StoredFieldsWriterPerThread(docState, this); + } + + public void Flush(SegmentWriteState state) + { + lock (this) + { + + if (state.numDocsInStore > 0) + { + // It's possible that all documents seen in this segment + // hit non-aborting exceptions, in which case we will + // not have yet init'd the FieldsWriter: + InitFieldsWriter(); + + // Fill fdx file to include any final docs that we + // skipped because they hit non-aborting exceptions + Fill(state.numDocsInStore - docWriter.DocStoreOffset); + } + + if (fieldsWriter != null) + fieldsWriter.Flush(); + } + } + + private void InitFieldsWriter() + { + if (fieldsWriter == null) + { + System.String docStoreSegment = docWriter.DocStoreSegment; + if (docStoreSegment != null) + { + System.Diagnostics.Debug.Assert(docStoreSegment != null); + fieldsWriter = new FieldsWriter(docWriter.directory, docStoreSegment, fieldInfos); + docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_EXTENSION); + docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); + lastDocID = 0; + } + } + } + + public void CloseDocStore(SegmentWriteState state) + { + lock (this) + { + int inc = state.numDocsInStore - lastDocID; + if (inc > 0) + { + InitFieldsWriter(); + Fill(state.numDocsInStore - docWriter.DocStoreOffset); + } + + if (fieldsWriter != null) + { + fieldsWriter.Dispose(); + fieldsWriter = null; + lastDocID = 0; + System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); + state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); + state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); + + state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_EXTENSION); + state.docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); + + System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.FIELDS_INDEX_EXTENSION; + + if (4 + ((long) state.numDocsInStore) * 8 != state.directory.FileLength(fileName)) + throw new System.SystemException("after flush: fdx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); + } + } + } + + internal int allocCount; + + internal PerDoc GetPerDoc() + { + lock (this) + { + if (freeCount == 0) + { + allocCount++; + if (allocCount > docFreeList.Length) + { + // Grow our free list up front to make sure we have + // enough space to recycle all outstanding PerDoc + // instances + System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length); + docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)]; + } + return new PerDoc(this); + } + else + return docFreeList[--freeCount]; + } + } + + internal void Abort() + { + lock (this) + { + if (fieldsWriter != null) + { + try + { + fieldsWriter.Dispose(); + } + catch (System.Exception) + { + } + fieldsWriter = null; + lastDocID = 0; + } + } + } + + /// Fills in any hole in the docIDs + internal void Fill(int docID) + { + int docStoreOffset = docWriter.DocStoreOffset; + + // We must "catch up" for all docs before us + // that had no stored fields: + int end = docID + docStoreOffset; + while (lastDocID < end) + { + fieldsWriter.SkipDocument(); + lastDocID++; + } + } + + internal void FinishDocument(PerDoc perDoc) + { + lock (this) + { + System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument start")); + InitFieldsWriter(); + + Fill(perDoc.docID); + + // Append stored fields to the real FieldsWriter: + fieldsWriter.FlushDocument(perDoc.numStoredFields, perDoc.fdt); + lastDocID++; + perDoc.Reset(); + Free(perDoc); + System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("StoredFieldsWriter.finishDocument end")); + } + } + + public bool FreeRAM() + { + return false; + } + + internal void Free(PerDoc perDoc) + { + lock (this) + { + System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); + System.Diagnostics.Debug.Assert(0 == perDoc.numStoredFields); + System.Diagnostics.Debug.Assert(0 == perDoc.fdt.Length); + System.Diagnostics.Debug.Assert(0 == perDoc.fdt.FilePointer); + docFreeList[freeCount++] = perDoc; + } + } + + internal class PerDoc:DocumentsWriter.DocWriter + { + public PerDoc(StoredFieldsWriter enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(StoredFieldsWriter enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + buffer = enclosingInstance.docWriter.NewPerDocBuffer(); + fdt = new RAMOutputStream(buffer); + } + private StoredFieldsWriter enclosingInstance; + public StoredFieldsWriter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + internal DocumentsWriter.PerDocBuffer buffer ; + internal RAMOutputStream fdt; + internal int numStoredFields; + + internal void Reset() + { + fdt.Reset(); + buffer.Recycle(); + numStoredFields = 0; + } + + public override void Abort() + { + Reset(); + Enclosing_Instance.Free(this); + } + + public override long SizeInBytes() + { + return buffer.SizeInBytes; + } + + public override void Finish() + { + Enclosing_Instance.FinishDocument(this); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/StoredFieldsWriterPerThread.cs b/external/Lucene.Net.Light/src/core/Index/StoredFieldsWriterPerThread.cs new file mode 100644 index 0000000000..17841255e4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/StoredFieldsWriterPerThread.cs @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; +using IndexOutput = Lucene.Net.Store.IndexOutput; + +namespace Lucene.Net.Index +{ + + sealed class StoredFieldsWriterPerThread + { + + internal FieldsWriter localFieldsWriter; + internal StoredFieldsWriter storedFieldsWriter; + internal DocumentsWriter.DocState docState; + + internal StoredFieldsWriter.PerDoc doc; + + public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) + { + this.storedFieldsWriter = storedFieldsWriter; + this.docState = docState; + localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null, storedFieldsWriter.fieldInfos); + } + + public void StartDocument() + { + if (doc != null) + { + // Only happens if previous document hit non-aborting + // exception while writing stored fields into + // localFieldsWriter: + doc.Reset(); + doc.docID = docState.docID; + } + } + + public void AddField(IFieldable field, FieldInfo fieldInfo) + { + if (doc == null) + { + doc = storedFieldsWriter.GetPerDoc(); + doc.docID = docState.docID; + localFieldsWriter.SetFieldsStream(doc.fdt); + System.Diagnostics.Debug.Assert(doc.numStoredFields == 0, "doc.numStoredFields=" + doc.numStoredFields); + System.Diagnostics.Debug.Assert(0 == doc.fdt.Length); + System.Diagnostics.Debug.Assert(0 == doc.fdt.FilePointer); + } + + localFieldsWriter.WriteField(fieldInfo, field); + System.Diagnostics.Debug.Assert(docState.TestPoint("StoredFieldsWriterPerThread.processFields.writeField")); + doc.numStoredFields++; + } + + public DocumentsWriter.DocWriter FinishDocument() + { + // If there were any stored fields in this doc, doc will + // be non-null; else it's null. + try + { + return doc; + } + finally + { + doc = null; + } + } + + public void Abort() + { + if (doc != null) + { + doc.Abort(); + doc = null; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/Term.cs b/external/Lucene.Net.Light/src/core/Index/Term.cs new file mode 100644 index 0000000000..cac6b15c6f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/Term.cs @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using StringHelper = Lucene.Net.Util.StringHelper; + +namespace Lucene.Net.Index +{ + + /// A Term represents a word from text. This is the unit of search. It is + /// composed of two elements, the text of the word, as a string, and the name of + /// the field that the text occured in, an interned string. + /// Note that terms may represent more than words from text fields, but also + /// things like dates, email addresses, urls, etc. + /// + [Serializable] + public sealed class Term : System.IComparable + { + internal System.String field; + internal System.String text; + + /// Constructs a Term with the given field and text. + ///

Note that a null field or null text value results in undefined + /// behavior for most Lucene APIs that accept a Term parameter. + ///

+ public Term(System.String fld, System.String txt) + { + field = StringHelper.Intern(fld); + text = txt; + } + + /// Constructs a Term with the given field and empty text. + /// This serves two purposes: 1) reuse of a Term with the same field. + /// 2) pattern for a query. + /// + /// + /// + /// + public Term(System.String fld):this(fld, "", true) + { + } + + internal Term(System.String fld, System.String txt, bool intern) + { + field = intern?StringHelper.Intern(fld):fld; // field names are interned + text = txt; // unless already known to be + } + + /// Returns the field of this term, an interned string. The field indicates + /// the part of a document which this term came from. + /// + public string Field + { + get { return field; } + } + + /// Returns the text of this term. In the case of words, this is simply the + /// text of the word. In the case of dates and other types, this is an + /// encoding of the object as a string. + /// + public string Text + { + get { return text; } + } + + /// Optimized construction of new Terms by reusing same field as this Term + /// - avoids field.intern() overhead + /// + /// The text of the new term (field is implicitly same as this Term instance) + /// + /// A new Term + /// + public Term CreateTerm(System.String text) + { + return new Term(field, text, false); + } + + //@Override + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (GetType() != obj.GetType()) + return false; + Term other = (Term) obj; + if (field == null) + { + if (other.field != null) + return false; + } + else if (!field.Equals(other.field)) + return false; + if (text == null) + { + if (other.text != null) + return false; + } + else if (!text.Equals(other.text)) + return false; + return true; + } + + //@Override + public override int GetHashCode() + { + int prime = 31; + int result = 1; + result = prime*result + ((field == null) ? 0 : field.GetHashCode()); + result = prime*result + ((text == null) ? 0 : text.GetHashCode()); + return result; + } + + /// Compares two terms, returning a negative integer if this + /// term belongs before the argument, zero if this term is equal to the + /// argument, and a positive integer if this term belongs after the argument. + /// The ordering of terms is first by field, then by text. + /// + public int CompareTo(Term other) + { + if ((System.Object) field == (System.Object) other.field) + // fields are interned + return String.CompareOrdinal(text, other.text); + else + return String.CompareOrdinal(field, other.field); + } + + ///// Resets the field and text of a Term. + //internal void Set(System.String fld, System.String txt) + //{ + // field = fld; + // text = txt; + //} + + public override System.String ToString() + { + return field + ":" + text; + } + +// private void ReadObject(System.IO.BinaryReader in_Renamed) +// { +// in_Renamed.defaultReadObject(); +// field = StringHelper.Intern(field); +// } + + [System.Runtime.Serialization.OnDeserialized] + internal void OnDeserialized(System.Runtime.Serialization.StreamingContext context) + { + field = StringHelper.Intern(field); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermBuffer.cs b/external/Lucene.Net.Light/src/core/Index/TermBuffer.cs new file mode 100644 index 0000000000..d97969ce20 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermBuffer.cs @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using IndexInput = Lucene.Net.Store.IndexInput; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Index +{ + + sealed class TermBuffer : System.ICloneable + { + + private System.String field; + private Term term; // cached + private bool preUTF8Strings; // true if strings are stored in modified UTF8 encoding (LUCENE-510) + private bool dirty; // true if text was set externally (ie not read via UTF8 bytes) + + private UnicodeUtil.UTF16Result text = new UnicodeUtil.UTF16Result(); + private UnicodeUtil.UTF8Result bytes = new UnicodeUtil.UTF8Result(); + + public int CompareTo(TermBuffer other) + { + if ((System.Object) field == (System.Object) other.field) + // fields are interned + return CompareChars(text.result, text.length, other.text.result, other.text.length); + else + return String.CompareOrdinal(field, other.field); + } + + private static int CompareChars(char[] chars1, int len1, char[] chars2, int len2) + { + int end = len1 < len2?len1:len2; + for (int k = 0; k < end; k++) + { + char c1 = chars1[k]; + char c2 = chars2[k]; + if (c1 != c2) + { + return c1 - c2; + } + } + return len1 - len2; + } + + /// Call this if the IndexInput passed to + /// stores terms in the "modified UTF8" (pre LUCENE-510) + /// format. + /// + internal void SetPreUTF8Strings() + { + preUTF8Strings = true; + } + + public void Read(IndexInput input, FieldInfos fieldInfos) + { + this.term = null; // invalidate cache + int start = input.ReadVInt(); + int length = input.ReadVInt(); + int totalLength = start + length; + if (preUTF8Strings) + { + text.SetLength(totalLength); + input.ReadChars(text.result, start, length); + } + else + { + + if (dirty) + { + // Fully convert all bytes since bytes is dirty + UnicodeUtil.UTF16toUTF8(text.result, 0, text.length, bytes); + bytes.SetLength(totalLength); + input.ReadBytes(bytes.result, start, length); + UnicodeUtil.UTF8toUTF16(bytes.result, 0, totalLength, text); + dirty = false; + } + else + { + // Incrementally convert only the UTF8 bytes that are new: + bytes.SetLength(totalLength); + input.ReadBytes(bytes.result, start, length); + UnicodeUtil.UTF8toUTF16(bytes.result, start, length, text); + } + } + this.field = fieldInfos.FieldName(input.ReadVInt()); + } + + public void Set(Term term) + { + if (term == null) + { + Reset(); + return ; + } + System.String termText = term.Text; + int termLen = termText.Length; + text.SetLength(termLen); + TextSupport.GetCharsFromString(termText, 0, termLen, text.result, 0); + dirty = true; + field = term.Field; + this.term = term; + } + + public void Set(TermBuffer other) + { + text.CopyText(other.text); + dirty = true; + field = other.field; + term = other.term; + } + + public void Reset() + { + field = null; + text.SetLength(0); + term = null; + dirty = true; + } + + public Term ToTerm() + { + if (field == null) + // unset + return null; + + if (term == null) + term = new Term(field, new System.String(text.result, 0, text.length), false); + + return term; + } + + public System.Object Clone() + { + TermBuffer clone = null; + try + { + clone = (TermBuffer) base.MemberwiseClone(); + } + catch (System.Exception) + { + } + + clone.dirty = true; + clone.bytes = new UnicodeUtil.UTF8Result(); + clone.text = new UnicodeUtil.UTF16Result(); + clone.text.CopyText(text); + return clone; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermDocs.cs b/external/Lucene.Net.Light/src/core/Index/TermDocs.cs new file mode 100644 index 0000000000..0ffdc285a2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermDocs.cs @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + /// TermDocs provides an interface for enumerating <document, frequency> + /// pairs for a term.

The document portion names each document containing + /// the term. Documents are indicated by number. The frequency portion gives + /// the number of times the term occurred in each document.

The pairs are + /// ordered by document number. + ///

+ /// + public interface TermDocs : IDisposable + { + /// Sets this to the data for a term. + /// The enumeration is reset to the start of the data for this term. + /// + void Seek(Term term); + + /// Sets this to the data for the current term in a . + /// This may be optimized in some implementations. + /// + void Seek(TermEnum termEnum); + + /// Returns the current document number.

This is invalid until + /// is called for the first time. + ///

+ int Doc { get; } + + /// Returns the frequency of the term within the current document.

This + /// is invalid until is called for the first time. + ///

+ int Freq { get; } + + /// Moves to the next pair in the enumeration.

Returns true iff there is + /// such a next pair in the enumeration. + ///

+ bool Next(); + + /// Attempts to read multiple entries from the enumeration, up to length of + /// docs. Document numbers are stored in docs, and term + /// frequencies are stored in freqs. The freqs array must be as + /// long as the docs array. + /// + ///

Returns the number of entries read. Zero is only returned when the + /// stream has been exhausted. + ///

+ int Read(int[] docs, int[] freqs); + + /// Skips entries to the first beyond the current whose document number is + /// greater than or equal to target.

Returns true iff there is such + /// an entry.

Behaves as if written: + /// boolean skipTo(int target) { + /// do { + /// if (!next()) + /// return false; + /// } while (target > doc()); + /// return true; + /// } + /// + /// Some implementations are considerably more efficient than that. + ///

+ bool SkipTo(int target); + + // TODO: Determine which release this will be removed from + /// Frees associated resources. + [Obsolete("Use Dispose() instead")] + void Close(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermEnum.cs b/external/Lucene.Net.Light/src/core/Index/TermEnum.cs new file mode 100644 index 0000000000..e663bd1b97 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermEnum.cs @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// Abstract class for enumerating terms. + ///

Term enumerations are always ordered by Term.compareTo(). Each term in + /// the enumeration is greater than all that precede it. + ///

+ public abstract class TermEnum : IDisposable + { + /// Increments the enumeration to the next element. True if one exists. + public abstract bool Next(); + + /// Returns the current Term in the enumeration. + public abstract Term Term { get; } + + /// Returns the docFreq of the current Term in the enumeration. + public abstract int DocFreq(); + + /// Closes the enumeration to further activity, freeing resources. + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + /// Closes the enumeration to further activity, freeing resources. + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermFreqVector.cs b/external/Lucene.Net.Light/src/core/Index/TermFreqVector.cs new file mode 100644 index 0000000000..ea0eb4347b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermFreqVector.cs @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + + /// Provides access to stored term vector of + /// a document field. The vector consists of the name of the field, an array of the terms tha occur in the field of the + /// and a parallel array of frequencies. Thus, getTermFrequencies()[5] corresponds with the + /// frequency of getTerms()[5], assuming there are at least 5 terms in the Document. + /// + public interface ITermFreqVector + { + /// The name. + /// The name of the field this vector is associated with. + string Field { get; } + + /// The number of terms in the term vector. + int Size { get; } + + /// An Array of term texts in ascending order. + /// + System.String[] GetTerms(); + + + /// Array of term frequencies. Locations of the array correspond one to one + /// to the terms in the array obtained from getTerms + /// method. Each location in the array contains the number of times this + /// term occurs in the document or the document field. + /// + int[] GetTermFrequencies(); + + + /// Return an index in the term numbers array returned from + /// getTerms at which the term with the specified + /// term appears. If this term does not appear in the array, + /// return -1. + /// + int IndexOf(System.String term); + + + /// Just like indexOf(int) but searches for a number of terms + /// at the same time. Returns an array that has the same size as the number + /// of terms searched for, each slot containing the result of searching for + /// that term number. + /// + /// + /// array containing terms to look for + /// + /// index in the array where the list of terms starts + /// + /// the number of terms in the list + /// + int[] IndexesOf(System.String[] terms, int start, int len); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermInfo.cs b/external/Lucene.Net.Light/src/core/Index/TermInfo.cs new file mode 100644 index 0000000000..5869f6f465 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermInfo.cs @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// A TermInfo is the record of information stored for a term. + + sealed class TermInfo + { + /// The number of documents which contain the term. + internal int docFreq = 0; + + internal long freqPointer = 0; + internal long proxPointer = 0; + internal int skipOffset; + + internal TermInfo() + { + } + + internal TermInfo(int df, long fp, long pp) + { + docFreq = df; + freqPointer = fp; + proxPointer = pp; + } + + internal TermInfo(TermInfo ti) + { + docFreq = ti.docFreq; + freqPointer = ti.freqPointer; + proxPointer = ti.proxPointer; + skipOffset = ti.skipOffset; + } + + internal void Set(int docFreq, long freqPointer, long proxPointer, int skipOffset) + { + this.docFreq = docFreq; + this.freqPointer = freqPointer; + this.proxPointer = proxPointer; + this.skipOffset = skipOffset; + } + + internal void Set(TermInfo ti) + { + docFreq = ti.docFreq; + freqPointer = ti.freqPointer; + proxPointer = ti.proxPointer; + skipOffset = ti.skipOffset; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermInfosReader.cs b/external/Lucene.Net.Light/src/core/Index/TermInfosReader.cs new file mode 100644 index 0000000000..044a7c3be5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermInfosReader.cs @@ -0,0 +1,325 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using Lucene.Net.Util; +using Lucene.Net.Util.Cache; +using Directory = Lucene.Net.Store.Directory; + +namespace Lucene.Net.Index +{ + + /// This stores a monotonically increasing set of <Term, TermInfo> pairs in a + /// Directory. Pairs are accessed either by Term or by ordinal position the + /// set. + /// + + sealed class TermInfosReader : IDisposable + { + private readonly Directory directory; + private readonly String segment; + private readonly FieldInfos fieldInfos; + + private bool isDisposed; + + private readonly CloseableThreadLocal threadResources = new CloseableThreadLocal(); + private readonly SegmentTermEnum origEnum; + private readonly long size; + + private readonly Term[] indexTerms; + private readonly TermInfo[] indexInfos; + private readonly long[] indexPointers; + + private readonly int totalIndexInterval; + + private const int DEFAULT_CACHE_SIZE = 1024; + + /// Per-thread resources managed by ThreadLocal + private sealed class ThreadResources + { + internal SegmentTermEnum termEnum; + + // Used for caching the least recently looked-up Terms + internal Cache termInfoCache; + } + + internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize, int indexDivisor) + { + bool success = false; + + if (indexDivisor < 1 && indexDivisor != - 1) + { + throw new System.ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor); + } + + try + { + directory = dir; + segment = seg; + fieldInfos = fis; + + origEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_EXTENSION, readBufferSize), fieldInfos, false); + size = origEnum.size; + + + if (indexDivisor != - 1) + { + // Load terms index + totalIndexInterval = origEnum.indexInterval * indexDivisor; + var indexEnum = new SegmentTermEnum(directory.OpenInput(segment + "." + IndexFileNames.TERMS_INDEX_EXTENSION, readBufferSize), fieldInfos, true); + + try + { + int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index + + indexTerms = new Term[indexSize]; + indexInfos = new TermInfo[indexSize]; + indexPointers = new long[indexSize]; + + for (int i = 0; indexEnum.Next(); i++) + { + indexTerms[i] = indexEnum.Term; + indexInfos[i] = indexEnum.TermInfo(); + indexPointers[i] = indexEnum.indexPointer; + + for (int j = 1; j < indexDivisor; j++) + if (!indexEnum.Next()) + break; + } + } + finally + { + indexEnum.Close(); + } + } + else + { + // Do not load terms index: + totalIndexInterval = - 1; + indexTerms = null; + indexInfos = null; + indexPointers = null; + } + success = true; + } + finally + { + // With lock-less commits, it's entirely possible (and + // fine) to hit a FileNotFound exception above. In + // this case, we want to explicitly close any subset + // of things that were opened so that we don't have to + // wait for a GC to do so. + if (!success) + { + Dispose(); + } + } + } + + public int SkipInterval + { + get { return origEnum.skipInterval; } + } + + public int MaxSkipLevels + { + get { return origEnum.maxSkipLevels; } + } + + public void Dispose() + { + if (isDisposed) return; + + // Move to protected method if class becomes unsealed + if (origEnum != null) + origEnum.Dispose(); + threadResources.Dispose(); + + isDisposed = true; + } + + /// Returns the number of term/value pairs in the set. + internal long Size() + { + return size; + } + + private ThreadResources GetThreadResources() + { + ThreadResources resources = threadResources.Get(); + if (resources == null) + { + resources = new ThreadResources + {termEnum = Terms(), termInfoCache = new SimpleLRUCache(DEFAULT_CACHE_SIZE)}; + // Cache does not have to be thread-safe, it is only used by one thread at the same time + threadResources.Set(resources); + } + return resources; + } + + + /// Returns the offset of the greatest index entry which is less than or equal to term. + private int GetIndexOffset(Term term) + { + int lo = 0; // binary search indexTerms[] + int hi = indexTerms.Length - 1; + + while (hi >= lo) + { + int mid = Number.URShift((lo + hi), 1); + int delta = term.CompareTo(indexTerms[mid]); + if (delta < 0) + hi = mid - 1; + else if (delta > 0) + lo = mid + 1; + else + return mid; + } + return hi; + } + + private void SeekEnum(SegmentTermEnum enumerator, int indexOffset) + { + enumerator.Seek(indexPointers[indexOffset], ((long)indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]); + } + + /// Returns the TermInfo for a Term in the set, or null. + internal TermInfo Get(Term term) + { + return Get(term, true); + } + + /// Returns the TermInfo for a Term in the set, or null. + private TermInfo Get(Term term, bool useCache) + { + if (size == 0) + return null; + + EnsureIndexIsRead(); + + TermInfo ti; + ThreadResources resources = GetThreadResources(); + Cache cache = null; + + if (useCache) + { + cache = resources.termInfoCache; + // check the cache first if the term was recently looked up + ti = cache.Get(term); + if (ti != null) + { + return ti; + } + } + + // optimize sequential access: first try scanning cached enum w/o seeking + SegmentTermEnum enumerator = resources.termEnum; + if (enumerator.Term != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term) >= 0)) + { + int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1; + if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0) + { + // no need to seek + + int numScans = enumerator.ScanTo(term); + if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0) + { + ti = enumerator.TermInfo(); + if (cache != null && numScans > 1) + { + // we only want to put this TermInfo into the cache if + // scanEnum skipped more than one dictionary entry. + // This prevents RangeQueries or WildcardQueries to + // wipe out the cache when they iterate over a large numbers + // of terms in order + cache.Put(term, ti); + } + } + else + { + ti = null; + } + + return ti; + } + } + + // random-access: must seek + SeekEnum(enumerator, GetIndexOffset(term)); + enumerator.ScanTo(term); + if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0) + { + ti = enumerator.TermInfo(); + if (cache != null) + { + cache.Put(term, ti); + } + } + else + { + ti = null; + } + return ti; + } + + private void EnsureIndexIsRead() + { + if (indexTerms == null) + { + throw new SystemException("terms index was not loaded when this reader was created"); + } + } + + /// Returns the position of a Term in the set or -1. + internal long GetPosition(Term term) + { + if (size == 0) + return - 1; + + EnsureIndexIsRead(); + int indexOffset = GetIndexOffset(term); + + SegmentTermEnum enumerator = GetThreadResources().termEnum; + SeekEnum(enumerator, indexOffset); + + while (term.CompareTo(enumerator.Term) > 0 && enumerator.Next()) + { + } + + if (term.CompareTo(enumerator.Term) == 0) + return enumerator.position; + else + return - 1; + } + + /// Returns an enumeration of all the Terms and TermInfos in the set. + public SegmentTermEnum Terms() + { + return (SegmentTermEnum) origEnum.Clone(); + } + + /// Returns an enumeration of terms starting at or after the named term. + public SegmentTermEnum Terms(Term term) + { + // don't use the cache in this call because we want to reposition the + // enumeration + Get(term, false); + return (SegmentTermEnum) GetThreadResources().termEnum.Clone(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermInfosWriter.cs b/external/Lucene.Net.Light/src/core/Index/TermInfosWriter.cs new file mode 100644 index 0000000000..c2512c35de --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermInfosWriter.cs @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Directory = Lucene.Net.Store.Directory; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Index +{ + + /// This stores a monotonically increasing set of <Term, TermInfo> pairs in a + /// Directory. A TermInfos can be written once, in order. + /// + + sealed class TermInfosWriter : IDisposable + { + /// The file format version, a negative number. + public const int FORMAT = - 3; + + // Changed strings to true utf8 with length-in-bytes not + // length-in-chars + public const int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = - 4; + + // NOTE: always change this if you switch to a new format! + public static readonly int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; + + private bool isDisposed; + + private FieldInfos fieldInfos; + private IndexOutput output; + private TermInfo lastTi = new TermInfo(); + private long size; + + // TODO: the default values for these two parameters should be settable from + // IndexWriter. However, once that's done, folks will start setting them to + // ridiculous values and complaining that things don't work well, as with + // mergeFactor. So, let's wait until a number of folks find that alternate + // values work better. Note that both of these values are stored in the + // segment, so that it's safe to change these w/o rebuilding all indexes. + + /// Expert: The fraction of terms in the "dictionary" which should be stored + /// in RAM. Smaller values use more memory, but make searching slightly + /// faster, while larger values use less memory and make searching slightly + /// slower. Searching is typically not dominated by dictionary lookup, so + /// tweaking this is rarely useful. + /// + internal int indexInterval = 128; + + /// Expert: The fraction of entries stored in skip tables, + /// used to accellerate . Larger values result in + /// smaller indexes, greater acceleration, but fewer accelerable cases, while + /// smaller values result in bigger indexes, less acceleration and more + /// accelerable cases. More detailed experiments would be useful here. + /// + internal int skipInterval = 16; + + /// Expert: The maximum number of skip levels. Smaller values result in + /// slightly smaller indexes, but slower skipping in big posting lists. + /// + internal int maxSkipLevels = 10; + + private long lastIndexPointer; + private bool isIndex; + private byte[] lastTermBytes = new byte[10]; + private int lastTermBytesLength = 0; + private int lastFieldNumber = - 1; + + private TermInfosWriter other; + private UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result(); + + internal TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval) + { + Initialize(directory, segment, fis, interval, false); + other = new TermInfosWriter(directory, segment, fis, interval, true); + other.other = this; + } + + private TermInfosWriter(Directory directory, System.String segment, FieldInfos fis, int interval, bool isIndex) + { + Initialize(directory, segment, fis, interval, isIndex); + } + + private void Initialize(Directory directory, System.String segment, FieldInfos fis, int interval, bool isi) + { + indexInterval = interval; + fieldInfos = fis; + isIndex = isi; + output = directory.CreateOutput(segment + (isIndex?".tii":".tis")); + output.WriteInt(FORMAT_CURRENT); // write format + output.WriteLong(0); // leave space for size + output.WriteInt(indexInterval); // write indexInterval + output.WriteInt(skipInterval); // write skipInterval + output.WriteInt(maxSkipLevels); // write maxSkipLevels + System.Diagnostics.Debug.Assert(InitUTF16Results()); + } + + internal void Add(Term term, TermInfo ti) + { + UnicodeUtil.UTF16toUTF8(term.Text, 0, term.Text.Length, utf8Result); + Add(fieldInfos.FieldNumber(term.Field), utf8Result.result, utf8Result.length, ti); + } + + // Currently used only by assert statements + internal UnicodeUtil.UTF16Result utf16Result1; + internal UnicodeUtil.UTF16Result utf16Result2; + + // Currently used only by assert statements + private bool InitUTF16Results() + { + utf16Result1 = new UnicodeUtil.UTF16Result(); + utf16Result2 = new UnicodeUtil.UTF16Result(); + return true; + } + + // Currently used only by assert statement + private int CompareToLastTerm(int fieldNumber, byte[] termBytes, int termBytesLength) + { + + if (lastFieldNumber != fieldNumber) + { + int cmp = String.CompareOrdinal(fieldInfos.FieldName(lastFieldNumber), fieldInfos.FieldName(fieldNumber)); + // If there is a field named "" (empty string) then we + // will get 0 on this comparison, yet, it's "OK". But + // it's not OK if two different field numbers map to + // the same name. + if (cmp != 0 || lastFieldNumber != - 1) + return cmp; + } + + UnicodeUtil.UTF8toUTF16(lastTermBytes, 0, lastTermBytesLength, utf16Result1); + UnicodeUtil.UTF8toUTF16(termBytes, 0, termBytesLength, utf16Result2); + int len; + if (utf16Result1.length < utf16Result2.length) + len = utf16Result1.length; + else + len = utf16Result2.length; + + for (int i = 0; i < len; i++) + { + char ch1 = utf16Result1.result[i]; + char ch2 = utf16Result2.result[i]; + if (ch1 != ch2) + return ch1 - ch2; + } + return utf16Result1.length - utf16Result2.length; + } + + /// Adds a new <fieldNumber, termBytes>, TermInfo> pair to the set. + /// Term must be lexicographically greater than all previous Terms added. + /// TermInfo pointers must be positive and greater than all previous. + /// + internal void Add(int fieldNumber, byte[] termBytes, int termBytesLength, TermInfo ti) + { + + System.Diagnostics.Debug.Assert(CompareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || + (isIndex && termBytesLength == 0 && lastTermBytesLength == 0), + "Terms are out of order: field=" + fieldInfos.FieldName(fieldNumber) + " (number " + fieldNumber + ")" + + " lastField=" + fieldInfos.FieldName(lastFieldNumber) + " (number " + lastFieldNumber + ")" + + " text=" + System.Text.Encoding.UTF8.GetString(termBytes, 0, termBytesLength) + " lastText=" + System.Text.Encoding.UTF8.GetString(lastTermBytes, 0, lastTermBytesLength)); + + System.Diagnostics.Debug.Assert(ti.freqPointer >= lastTi.freqPointer, "freqPointer out of order (" + ti.freqPointer + " < " + lastTi.freqPointer + ")"); + System.Diagnostics.Debug.Assert(ti.proxPointer >= lastTi.proxPointer, "proxPointer out of order (" + ti.proxPointer + " < " + lastTi.proxPointer + ")"); + + if (!isIndex && size % indexInterval == 0) + other.Add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term + + WriteTerm(fieldNumber, termBytes, termBytesLength); // write term + + output.WriteVInt(ti.docFreq); // write doc freq + output.WriteVLong(ti.freqPointer - lastTi.freqPointer); // write pointers + output.WriteVLong(ti.proxPointer - lastTi.proxPointer); + + if (ti.docFreq >= skipInterval) + { + output.WriteVInt(ti.skipOffset); + } + + if (isIndex) + { + output.WriteVLong(other.output.FilePointer - lastIndexPointer); + lastIndexPointer = other.output.FilePointer; // write pointer + } + + lastFieldNumber = fieldNumber; + lastTi.Set(ti); + size++; + } + + private void WriteTerm(int fieldNumber, byte[] termBytes, int termBytesLength) + { + + // TODO: UTF16toUTF8 could tell us this prefix + // Compute prefix in common with last term: + int start = 0; + int limit = termBytesLength < lastTermBytesLength?termBytesLength:lastTermBytesLength; + while (start < limit) + { + if (termBytes[start] != lastTermBytes[start]) + break; + start++; + } + + int length = termBytesLength - start; + output.WriteVInt(start); // write shared prefix length + output.WriteVInt(length); // write delta length + output.WriteBytes(termBytes, start, length); // write delta bytes + output.WriteVInt(fieldNumber); // write field num + if (lastTermBytes.Length < termBytesLength) + { + byte[] newArray = new byte[(int) (termBytesLength * 1.5)]; + Array.Copy(lastTermBytes, 0, newArray, 0, start); + lastTermBytes = newArray; + } + Array.Copy(termBytes, start, lastTermBytes, start, length); + lastTermBytesLength = termBytesLength; + } + + /// Called to complete TermInfos creation. + public void Dispose() + { + // Move to protected method if class becomes unsealed + if (isDisposed) return; + + output.Seek(4); // write size after format + output.WriteLong(size); + output.Dispose(); + + if (!isIndex) + other.Dispose(); + + isDisposed = true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermPositionVector.cs b/external/Lucene.Net.Light/src/core/Index/TermPositionVector.cs new file mode 100644 index 0000000000..fe57719ec6 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermPositionVector.cs @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// Extends TermFreqVector to provide additional information about + /// positions in which each of the terms is found. A TermPositionVector not necessarily + /// contains both positions and offsets, but at least one of these arrays exists. + /// + public interface TermPositionVector:ITermFreqVector + { + + /// Returns an array of positions in which the term is found. + /// Terms are identified by the index at which its number appears in the + /// term String array obtained from the indexOf method. + /// May return null if positions have not been stored. + /// + int[] GetTermPositions(int index); + + /// Returns an array of TermVectorOffsetInfo in which the term is found. + /// May return null if offsets have not been stored. + /// + /// + /// + /// + /// + /// The position in the array to get the offsets from + /// + /// An array of TermVectorOffsetInfo objects or the empty list + /// + TermVectorOffsetInfo[] GetOffsets(int index); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermPositions.cs b/external/Lucene.Net.Light/src/core/Index/TermPositions.cs new file mode 100644 index 0000000000..ff58a5c938 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermPositions.cs @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// TermPositions provides an interface for enumerating the <document, + /// frequency, <position>* > tuples for a term.

The document and + /// frequency are the same as for a TermDocs. The positions portion lists the ordinal + /// positions of each occurrence of a term in a document. + /// + ///

+ /// + /// + + public interface TermPositions : TermDocs + { + /// Returns next position in the current document. It is an error to call + /// this more than times + /// without calling

This is + /// invalid until is called for + /// the first time. + ///

+ int NextPosition(); + + /// Returns the length of the payload at the current term position. + /// This is invalid until is called for + /// the first time.
+ ///
+ /// length of the current payload in number of bytes + int PayloadLength { get; } + + /// Returns the payload data at the current term position. + /// This is invalid until is called for + /// the first time. + /// This method must not be called more than once after each call + /// of . However, payloads are loaded lazily, + /// so if the payload data for the current position is not needed, + /// this method may not be called at all for performance reasons.
+ /// + ///
+ /// the array into which the data of this payload is to be + /// stored, if it is big enough; otherwise, a new byte[] array + /// is allocated for this purpose. + /// + /// the offset in the array into which the data of this payload + /// is to be stored. + /// + /// a byte[] array containing the data of this payload + /// + /// IOException + byte[] GetPayload(byte[] data, int offset); + + /// Checks if a payload can be loaded at this position. + ///

+ /// Payloads can only be loaded once per call to + /// . + /// + ///

+ /// true if there is a payload available at this position that can be loaded + bool IsPayloadAvailable { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorEntry.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorEntry.cs new file mode 100644 index 0000000000..cfdc57dd9c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorEntry.cs @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// Convenience class for holding TermVector information. + public class TermVectorEntry + { + private System.String field; + private System.String term; + private int frequency; + private TermVectorOffsetInfo[] offsets; + private int[] positions; + + + public TermVectorEntry() + { + } + + public TermVectorEntry(System.String field, System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) + { + this.field = field; + this.term = term; + this.frequency = frequency; + this.offsets = offsets; + this.positions = positions; + } + + + public virtual string Field + { + get { return field; } + } + + public virtual int Frequency + { + get { return frequency; } + internal set { this.frequency = value; } + } + + internal virtual void SetOffsets(TermVectorOffsetInfo[] value) + { + offsets = value; + } + + public virtual TermVectorOffsetInfo[] GetOffsets() + { + return offsets; + } + + internal virtual void SetPositions(int[] value) + { + positions = value; + } + + public virtual int[] GetPositions() + { + return positions; + } + + public virtual string Term + { + get { return term; } + } + + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (o == null || GetType() != o.GetType()) + return false; + + TermVectorEntry that = (TermVectorEntry) o; + + if (term != null?!term.Equals(that.term):that.term != null) + return false; + + return true; + } + + public override int GetHashCode() + { + return (term != null?term.GetHashCode():0); + } + + public override System.String ToString() + { + return "TermVectorEntry{" + "field='" + field + '\'' + ", term='" + term + '\'' + ", frequency=" + frequency + '}'; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorEntryFreqSortedComparator.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorEntryFreqSortedComparator.cs new file mode 100644 index 0000000000..e5de0754f2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorEntryFreqSortedComparator.cs @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// Compares s first by frequency and then by + /// the term (case-sensitive) + /// + /// + /// + public class TermVectorEntryFreqSortedComparator : System.Collections.Generic.IComparer + { + public virtual int Compare(TermVectorEntry entry, TermVectorEntry entry1) + { + int result = 0; + result = entry1.Frequency - entry.Frequency; + if (result == 0) + { + result = String.CompareOrdinal(entry.Term, entry1.Term); + if (result == 0) + { + result = String.CompareOrdinal(entry.Field, entry1.Field); + } + } + return result; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorMapper.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorMapper.cs new file mode 100644 index 0000000000..5ff66037a8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorMapper.cs @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + /// The TermVectorMapper can be used to map Term Vectors into your own + /// structure instead of the parallel array structure used by + /// . + ///

+ /// It is up to the implementation to make sure it is thread-safe. + /// + /// + /// + ///

+ public abstract class TermVectorMapper + { + + private bool ignoringPositions; + private bool ignoringOffsets; + + + protected internal TermVectorMapper() + { + } + + /// + /// true if this mapper should tell Lucene to ignore positions even if they are stored + /// + /// similar to ignoringPositions + /// + protected internal TermVectorMapper(bool ignoringPositions, bool ignoringOffsets) + { + this.ignoringPositions = ignoringPositions; + this.ignoringOffsets = ignoringOffsets; + } + + /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. + /// This method will be called once before retrieving the vector for a field. + /// + /// This method will be called before . + /// + /// The field the vector is for + /// + /// The number of terms that need to be mapped + /// + /// true if the mapper should expect offset information + /// + /// true if the mapper should expect positions info + /// + public abstract void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions); + /// Map the Term Vector information into your own structure + /// The term to add to the vector + /// + /// The frequency of the term in the document + /// + /// null if the offset is not specified, otherwise the offset into the field of the term + /// + /// null if the position is not specified, otherwise the position in the field of the term + /// + public abstract void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions); + + /// Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they + /// can be skipped over. Derived classes should set this to true if they want to ignore positions. The default + /// is false, meaning positions will be loaded if they are stored. + /// + /// false + public virtual bool IsIgnoringPositions + { + get { return ignoringPositions; } + } + + /// + /// Same principal as , but applied to offsets. false by default. + /// + /// false + public virtual bool IsIgnoringOffsets + { + get { return ignoringOffsets; } + } + + /// Passes down the index of the document whose term vector is currently being mapped, + /// once for each top level call to a term vector reader. + ///

+ /// Default implementation IGNORES the document number. Override if your implementation needs the document number. + ///

+ /// NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. + /// + ///

+ /// index of document currently being mapped + /// + public virtual void SetDocumentNumber(int documentNumber) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorOffsetInfo.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorOffsetInfo.cs new file mode 100644 index 0000000000..3e7f885927 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorOffsetInfo.cs @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using System.Runtime.InteropServices; + +namespace Lucene.Net.Index +{ + + /// The TermVectorOffsetInfo class holds information pertaining to a Term in a 's + /// offset information. This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the + /// original content). + /// + [Serializable] + public struct TermVectorOffsetInfo : IEquatable + { + /// Convenience declaration when creating a that stores only position information. + [NonSerialized] + public static readonly TermVectorOffsetInfo[] EMPTY_OFFSET_INFO = new TermVectorOffsetInfo[0]; + + [NonSerialized] + public static readonly TermVectorOffsetInfo Null = new TermVectorOffsetInfo(int.MinValue, int.MinValue); + + private int startOffset; + private int endOffset; + + //public TermVectorOffsetInfo() + //{ + //} + + public TermVectorOffsetInfo(int startOffset, int endOffset) + { + this.endOffset = endOffset; + this.startOffset = startOffset; + } + + /// The accessor for the ending offset for the term + /// The offset + public int EndOffset + { + get { return endOffset; } + set { this.endOffset = value; } + } + + /// The accessor for the starting offset of the term. + /// + /// + /// The offset + public int StartOffset + { + get { return startOffset; } + set { this.startOffset = value; } + } + + ///// Two TermVectorOffsetInfos are equals if both the start and end offsets are the same + ///// The comparison Object + ///// + ///// true if both and are the same for both objects. + ///// + //public override bool Equals(System.Object o) + //{ + // if (this == o) + // return true; + // if (!(o is TermVectorOffsetInfo)) + // return false; + + // TermVectorOffsetInfo termVectorOffsetInfo = (TermVectorOffsetInfo) o; + + // if (endOffset != termVectorOffsetInfo.endOffset) + // return false; + // if (startOffset != termVectorOffsetInfo.startOffset) + // return false; + + // return true; + //} + + //public override int GetHashCode() + //{ + // int result; + // result = startOffset; + // result = 29 * result + endOffset; + // return result; + //} + + + public bool Equals(TermVectorOffsetInfo other) + { + return startOffset == other.startOffset && endOffset == other.endOffset; + } + + public override bool Equals(object obj) + { + if (ReferenceEquals(null, obj)) + { + return EndOffset == int.MinValue && StartOffset == int.MinValue; + } + if (obj.GetType() != typeof (TermVectorOffsetInfo)) return false; + return Equals((TermVectorOffsetInfo) obj); + } + + public override int GetHashCode() + { + unchecked + { + return (startOffset*397) ^ endOffset; + } + } + + public static bool operator ==(TermVectorOffsetInfo left, object right) + { + return left.Equals(right); + } + + public static bool operator !=(TermVectorOffsetInfo left, object right) + { + return !left.Equals(right); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorsReader.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorsReader.cs new file mode 100644 index 0000000000..56cf76427a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorsReader.cs @@ -0,0 +1,731 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; + +namespace Lucene.Net.Index +{ + class TermVectorsReader : System.ICloneable, IDisposable + { + + // NOTE: if you make a new format, it must be larger than + // the current format + internal const int FORMAT_VERSION = 2; + + // Changes to speed up bulk merging of term vectors: + internal const int FORMAT_VERSION2 = 3; + + // Changed strings to UTF8 with length-in-bytes not length-in-chars + internal const int FORMAT_UTF8_LENGTH_IN_BYTES = 4; + + // NOTE: always change this if you switch to a new format! + internal static readonly int FORMAT_CURRENT = FORMAT_UTF8_LENGTH_IN_BYTES; + + //The size in bytes that the FORMAT_VERSION will take up at the beginning of each file + internal const int FORMAT_SIZE = 4; + + internal const byte STORE_POSITIONS_WITH_TERMVECTOR = (byte) (0x1); + internal const byte STORE_OFFSET_WITH_TERMVECTOR = (byte) (0x2); + + private FieldInfos fieldInfos; + + private IndexInput tvx; + private IndexInput tvd; + private IndexInput tvf; + private int size; + private int numTotalDocs; + + // The docID offset where our docs begin in the index + // file. This will be 0 if we have our own private file. + private int docStoreOffset; + + private int format; + private bool isDisposed; + + internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos):this(d, segment, fieldInfos, BufferedIndexInput.BUFFER_SIZE) + { + } + + internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize):this(d, segment, fieldInfos, readBufferSize, - 1, 0) + { + } + + internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size) + { + bool success = false; + + try + { + if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)) + { + tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize); + format = CheckValidFormat(tvx); + tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize); + int tvdFormat = CheckValidFormat(tvd); + tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize); + int tvfFormat = CheckValidFormat(tvf); + + System.Diagnostics.Debug.Assert(format == tvdFormat); + System.Diagnostics.Debug.Assert(format == tvfFormat); + + if (format >= FORMAT_VERSION2) + { + System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0); + numTotalDocs = (int)(tvx.Length() >> 4); + } + else + { + System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0); + numTotalDocs = (int)(tvx.Length() >> 3); + } + + if (-1 == docStoreOffset) + { + this.docStoreOffset = 0; + this.size = numTotalDocs; + System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size); + } + else + { + this.docStoreOffset = docStoreOffset; + this.size = size; + // Verify the file is long enough to hold all of our + // docs + System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); + } + } + else + { + // If all documents flushed in a segment had hit + // non-aborting exceptions, it's possible that + // FieldInfos.hasVectors returns true yet the term + // vector files don't exist. + format = 0; + } + + + this.fieldInfos = fieldInfos; + success = true; + } + finally + { + // With lock-less commits, it's entirely possible (and + // fine) to hit a FileNotFound exception above. In + // this case, we want to explicitly close any subset + // of things that were opened so that we don't have to + // wait for a GC to do so. + if (!success) + { + Dispose(); + } + } + } + + // Used for bulk copy when merging + internal virtual IndexInput GetTvdStream() + { + return tvd; + } + + // Used for bulk copy when merging + internal virtual IndexInput GetTvfStream() + { + return tvf; + } + + private void SeekTvx(int docNum) + { + if (format < FORMAT_VERSION2) + tvx.Seek((docNum + docStoreOffset) * 8L + FORMAT_SIZE); + else + tvx.Seek((docNum + docStoreOffset) * 16L + FORMAT_SIZE); + } + + internal virtual bool CanReadRawDocs() + { + return format >= FORMAT_UTF8_LENGTH_IN_BYTES; + } + + /// Retrieve the length (in bytes) of the tvd and tvf + /// entries for the next numDocs starting with + /// startDocID. This is used for bulk copying when + /// merging segments, if the field numbers are + /// congruent. Once this returns, the tvf & tvd streams + /// are seeked to the startDocID. + /// + internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) + { + + if (tvx == null) + { + for (int i = 0; i < tvdLengths.Length; i++) + { + tvdLengths[i] = 0; + } + for (int i = 0; i < tvfLengths.Length; i++) + { + tvfLengths[i] = 0; + } + return ; + } + + // SegmentMerger calls canReadRawDocs() first and should + // not call us if that returns false. + if (format < FORMAT_VERSION2) + throw new System.SystemException("cannot read raw docs with older term vector formats"); + + SeekTvx(startDocID); + + long tvdPosition = tvx.ReadLong(); + tvd.Seek(tvdPosition); + + long tvfPosition = tvx.ReadLong(); + tvf.Seek(tvfPosition); + + long lastTvdPosition = tvdPosition; + long lastTvfPosition = tvfPosition; + + int count = 0; + while (count < numDocs) + { + int docID = docStoreOffset + startDocID + count + 1; + System.Diagnostics.Debug.Assert(docID <= numTotalDocs); + if (docID < numTotalDocs) + { + tvdPosition = tvx.ReadLong(); + tvfPosition = tvx.ReadLong(); + } + else + { + tvdPosition = tvd.Length(); + tvfPosition = tvf.Length(); + System.Diagnostics.Debug.Assert(count == numDocs - 1); + } + tvdLengths[count] = (int) (tvdPosition - lastTvdPosition); + tvfLengths[count] = (int) (tvfPosition - lastTvfPosition); + count++; + lastTvdPosition = tvdPosition; + lastTvfPosition = tvfPosition; + } + } + + private int CheckValidFormat(IndexInput in_Renamed) + { + int format = in_Renamed.ReadInt(); + if (format > FORMAT_CURRENT) + { + throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FORMAT_CURRENT + " or less"); + } + return format; + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + // make all effort to close up. Keep the first exception + // and throw it as a new one. + System.IO.IOException keep = null; + if (tvx != null) + try + { + tvx.Close(); + } + catch (System.IO.IOException e) + { + if (keep == null) + keep = e; + } + if (tvd != null) + try + { + tvd.Close(); + } + catch (System.IO.IOException e) + { + if (keep == null) + keep = e; + } + if (tvf != null) + try + { + tvf.Close(); + } + catch (System.IO.IOException e) + { + if (keep == null) + keep = e; + } + if (keep != null) + { + throw new System.IO.IOException(keep.StackTrace); + } + } + + isDisposed = true; + } + + /// + /// The number of documents in the reader + /// + internal virtual int Size() + { + return size; + } + + public virtual void Get(int docNum, System.String field, TermVectorMapper mapper) + { + if (tvx != null) + { + int fieldNumber = fieldInfos.FieldNumber(field); + //We need to account for the FORMAT_SIZE at when seeking in the tvx + //We don't need to do this in other seeks because we already have the + // file pointer + //that was written in another file + SeekTvx(docNum); + //System.out.println("TVX Pointer: " + tvx.getFilePointer()); + long tvdPosition = tvx.ReadLong(); + + tvd.Seek(tvdPosition); + int fieldCount = tvd.ReadVInt(); + //System.out.println("Num Fields: " + fieldCount); + // There are only a few fields per document. We opt for a full scan + // rather then requiring that they be ordered. We need to read through + // all of the fields anyway to get to the tvf pointers. + int number = 0; + int found = - 1; + for (int i = 0; i < fieldCount; i++) + { + if (format >= FORMAT_VERSION) + number = tvd.ReadVInt(); + else + number += tvd.ReadVInt(); + + if (number == fieldNumber) + found = i; + } + + // This field, although valid in the segment, was not found in this + // document + if (found != - 1) + { + // Compute position in the tvf file + long position; + if (format >= FORMAT_VERSION2) + position = tvx.ReadLong(); + else + position = tvd.ReadVLong(); + for (int i = 1; i <= found; i++) + position += tvd.ReadVLong(); + + mapper.SetDocumentNumber(docNum); + ReadTermVector(field, position, mapper); + } + else + { + //System.out.println("Fieldable not found"); + } + } + else + { + //System.out.println("No tvx file"); + } + } + + + + /// Retrieve the term vector for the given document and field + /// The document number to retrieve the vector for + /// + /// The field within the document to retrieve + /// + /// The TermFreqVector for the document and field or null if there is no termVector for this field. + /// + /// IOException if there is an error reading the term vector files + public /*internal*/ virtual ITermFreqVector Get(int docNum, System.String field) + { + // Check if no term vectors are available for this segment at all + ParallelArrayTermVectorMapper mapper = new ParallelArrayTermVectorMapper(); + Get(docNum, field, mapper); + + return mapper.MaterializeVector(); + } + + // Reads the String[] fields; you have to pre-seek tvd to + // the right point + private System.String[] ReadFields(int fieldCount) + { + int number = 0; + System.String[] fields = new System.String[fieldCount]; + + for (int i = 0; i < fieldCount; i++) + { + if (format >= FORMAT_VERSION) + number = tvd.ReadVInt(); + else + number += tvd.ReadVInt(); + + fields[i] = fieldInfos.FieldName(number); + } + + return fields; + } + + // Reads the long[] offsets into TVF; you have to pre-seek + // tvx/tvd to the right point + private long[] ReadTvfPointers(int fieldCount) + { + // Compute position in the tvf file + long position; + if (format >= FORMAT_VERSION2) + position = tvx.ReadLong(); + else + position = tvd.ReadVLong(); + + long[] tvfPointers = new long[fieldCount]; + tvfPointers[0] = position; + + for (int i = 1; i < fieldCount; i++) + { + position += tvd.ReadVLong(); + tvfPointers[i] = position; + } + + return tvfPointers; + } + + /// Return all term vectors stored for this document or null if the could not be read in. + /// + /// + /// The document number to retrieve the vector for + /// + /// All term frequency vectors + /// + /// IOException if there is an error reading the term vector files + public /*internal*/ virtual ITermFreqVector[] Get(int docNum) + { + ITermFreqVector[] result = null; + if (tvx != null) + { + //We need to offset by + SeekTvx(docNum); + long tvdPosition = tvx.ReadLong(); + + tvd.Seek(tvdPosition); + int fieldCount = tvd.ReadVInt(); + + // No fields are vectorized for this document + if (fieldCount != 0) + { + System.String[] fields = ReadFields(fieldCount); + long[] tvfPointers = ReadTvfPointers(fieldCount); + result = ReadTermVectors(docNum, fields, tvfPointers); + } + } + else + { + //System.out.println("No tvx file"); + } + return result; + } + + public virtual void Get(int docNumber, TermVectorMapper mapper) + { + // Check if no term vectors are available for this segment at all + if (tvx != null) + { + //We need to offset by + + SeekTvx(docNumber); + long tvdPosition = tvx.ReadLong(); + + tvd.Seek(tvdPosition); + int fieldCount = tvd.ReadVInt(); + + // No fields are vectorized for this document + if (fieldCount != 0) + { + System.String[] fields = ReadFields(fieldCount); + long[] tvfPointers = ReadTvfPointers(fieldCount); + mapper.SetDocumentNumber(docNumber); + ReadTermVectors(fields, tvfPointers, mapper); + } + } + else + { + //System.out.println("No tvx file"); + } + } + + + private SegmentTermVector[] ReadTermVectors(int docNum, System.String[] fields, long[] tvfPointers) + { + SegmentTermVector[] res = new SegmentTermVector[fields.Length]; + for (int i = 0; i < fields.Length; i++) + { + var mapper = new ParallelArrayTermVectorMapper(); + mapper.SetDocumentNumber(docNum); + ReadTermVector(fields[i], tvfPointers[i], mapper); + res[i] = (SegmentTermVector) mapper.MaterializeVector(); + } + return res; + } + + private void ReadTermVectors(System.String[] fields, long[] tvfPointers, TermVectorMapper mapper) + { + for (int i = 0; i < fields.Length; i++) + { + ReadTermVector(fields[i], tvfPointers[i], mapper); + } + } + + + /// + /// The field to read in + /// + /// The pointer within the tvf file where we should start reading + /// + /// The mapper used to map the TermVector + /// + /// IOException + private void ReadTermVector(System.String field, long tvfPointer, TermVectorMapper mapper) + { + + // Now read the data from specified position + //We don't need to offset by the FORMAT here since the pointer already includes the offset + tvf.Seek(tvfPointer); + + int numTerms = tvf.ReadVInt(); + //System.out.println("Num Terms: " + numTerms); + // If no terms - return a constant empty termvector. However, this should never occur! + if (numTerms == 0) + return ; + + bool storePositions; + bool storeOffsets; + + if (format >= FORMAT_VERSION) + { + byte bits = tvf.ReadByte(); + storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; + storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; + } + else + { + tvf.ReadVInt(); + storePositions = false; + storeOffsets = false; + } + mapper.SetExpectations(field, numTerms, storeOffsets, storePositions); + int start = 0; + int deltaLength = 0; + int totalLength = 0; + byte[] byteBuffer; + char[] charBuffer; + bool preUTF8 = format < FORMAT_UTF8_LENGTH_IN_BYTES; + + // init the buffers + if (preUTF8) + { + charBuffer = new char[10]; + byteBuffer = null; + } + else + { + charBuffer = null; + byteBuffer = new byte[20]; + } + + for (int i = 0; i < numTerms; i++) + { + start = tvf.ReadVInt(); + deltaLength = tvf.ReadVInt(); + totalLength = start + deltaLength; + + System.String term; + + if (preUTF8) + { + // Term stored as java chars + if (charBuffer.Length < totalLength) + { + char[] newCharBuffer = new char[(int) (1.5 * totalLength)]; + Array.Copy(charBuffer, 0, newCharBuffer, 0, start); + charBuffer = newCharBuffer; + } + tvf.ReadChars(charBuffer, start, deltaLength); + term = new System.String(charBuffer, 0, totalLength); + } + else + { + // Term stored as utf8 bytes + if (byteBuffer.Length < totalLength) + { + byte[] newByteBuffer = new byte[(int) (1.5 * totalLength)]; + Array.Copy(byteBuffer, 0, newByteBuffer, 0, start); + byteBuffer = newByteBuffer; + } + tvf.ReadBytes(byteBuffer, start, deltaLength); + term = System.Text.Encoding.UTF8.GetString(byteBuffer, 0, totalLength); + } + int freq = tvf.ReadVInt(); + int[] positions = null; + if (storePositions) + { + //read in the positions + //does the mapper even care about positions? + if (mapper.IsIgnoringPositions == false) + { + positions = new int[freq]; + int prevPosition = 0; + for (int j = 0; j < freq; j++) + { + positions[j] = prevPosition + tvf.ReadVInt(); + prevPosition = positions[j]; + } + } + else + { + //we need to skip over the positions. Since these are VInts, I don't believe there is anyway to know for sure how far to skip + // + for (int j = 0; j < freq; j++) + { + tvf.ReadVInt(); + } + } + } + TermVectorOffsetInfo[] offsets = null; + if (storeOffsets) + { + //does the mapper even care about offsets? + if (mapper.IsIgnoringOffsets == false) + { + offsets = new TermVectorOffsetInfo[freq]; + int prevOffset = 0; + for (int j = 0; j < freq; j++) + { + int startOffset = prevOffset + tvf.ReadVInt(); + int endOffset = startOffset + tvf.ReadVInt(); + offsets[j] = new TermVectorOffsetInfo(startOffset, endOffset); + prevOffset = endOffset; + } + } + else + { + for (int j = 0; j < freq; j++) + { + tvf.ReadVInt(); + tvf.ReadVInt(); + } + } + } + mapper.Map(term, freq, offsets, positions); + } + } + + public virtual System.Object Clone() + { + + TermVectorsReader clone = (TermVectorsReader) base.MemberwiseClone(); + + // These are null when a TermVectorsReader was created + // on a segment that did not have term vectors saved + if (tvx != null && tvd != null && tvf != null) + { + clone.tvx = (IndexInput) tvx.Clone(); + clone.tvd = (IndexInput) tvd.Clone(); + clone.tvf = (IndexInput) tvf.Clone(); + } + + return clone; + } + } + + + /// Models the existing parallel array structure + class ParallelArrayTermVectorMapper:TermVectorMapper + { + + private System.String[] terms; + private int[] termFreqs; + private int[][] positions; + private TermVectorOffsetInfo[][] offsets; + private int currentPosition; + private bool storingOffsets; + private bool storingPositions; + private System.String field; + + public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) + { + this.field = field; + terms = new System.String[numTerms]; + termFreqs = new int[numTerms]; + this.storingOffsets = storeOffsets; + this.storingPositions = storePositions; + if (storePositions) + this.positions = new int[numTerms][]; + if (storeOffsets) + this.offsets = new TermVectorOffsetInfo[numTerms][]; + } + + public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) + { + terms[currentPosition] = term; + termFreqs[currentPosition] = frequency; + if (storingOffsets) + { + this.offsets[currentPosition] = offsets; + } + if (storingPositions) + { + this.positions[currentPosition] = positions; + } + currentPosition++; + } + + /// Construct the vector + /// The based on the mappings. + /// + public virtual ITermFreqVector MaterializeVector() + { + SegmentTermVector tv = null; + if (field != null && terms != null) + { + if (storingPositions || storingOffsets) + { + tv = new SegmentTermPositionVector(field, terms, termFreqs, positions, offsets); + } + else + { + tv = new SegmentTermVector(field, terms, termFreqs); + } + } + return tv; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriter.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriter.cs new file mode 100644 index 0000000000..d128a75e49 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriter.cs @@ -0,0 +1,380 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using RAMOutputStream = Lucene.Net.Store.RAMOutputStream; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; + +namespace Lucene.Net.Index +{ + sealed class TermVectorsTermsWriter:TermsHashConsumer + { + private void InitBlock() + { + docFreeList = new PerDoc[1]; + } + + internal DocumentsWriter docWriter; + internal TermVectorsWriter termVectorsWriter; + internal PerDoc[] docFreeList; + internal int freeCount; + internal IndexOutput tvx; + internal IndexOutput tvd; + internal IndexOutput tvf; + internal int lastDocID; + + public TermVectorsTermsWriter(DocumentsWriter docWriter) + { + InitBlock(); + this.docWriter = docWriter; + } + + public override TermsHashConsumerPerThread AddThread(TermsHashPerThread termsHashPerThread) + { + return new TermVectorsTermsWriterPerThread(termsHashPerThread, this); + } + + internal override void CreatePostings(RawPostingList[] postings, int start, int count) + { + int end = start + count; + for (int i = start; i < end; i++) + postings[i] = new PostingList(); + } + + public override void Flush(IDictionary> threadsAndFields, SegmentWriteState state) + { + lock (this) + { + // NOTE: it's possible that all documents seen in this segment + // hit non-aborting exceptions, in which case we will + // not have yet init'd the TermVectorsWriter. This is + // actually OK (unlike in the stored fields case) + // because, although IieldInfos.hasVectors() will return + // true, the TermVectorsReader gracefully handles + // non-existence of the term vectors files. + if (tvx != null) + { + + if (state.numDocsInStore > 0) + // In case there are some final documents that we + // didn't see (because they hit a non-aborting exception): + Fill(state.numDocsInStore - docWriter.DocStoreOffset); + + tvx.Flush(); + tvd.Flush(); + tvf.Flush(); + } + + foreach(var entry in threadsAndFields) + { + foreach(var field in entry.Value) + { + TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)field; + perField.termsHashPerField.Reset(); + perField.ShrinkHash(); + } + + TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key; + perThread.termsHashPerThread.Reset(true); + } + } + } + + internal override void CloseDocStore(SegmentWriteState state) + { + lock (this) + { + if (tvx != null) + { + // At least one doc in this run had term vectors + // enabled + Fill(state.numDocsInStore - docWriter.DocStoreOffset); + tvx.Close(); + tvf.Close(); + tvd.Close(); + tvx = null; + System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null); + System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION; + if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName)) + throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName)); + + state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); + state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); + state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); + + docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); + docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); + docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); + + lastDocID = 0; + } + } + } + + internal int allocCount; + + internal PerDoc GetPerDoc() + { + lock (this) + { + if (freeCount == 0) + { + allocCount++; + if (allocCount > docFreeList.Length) + { + // Grow our free list up front to make sure we have + // enough space to recycle all outstanding PerDoc + // instances + System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length); + docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)]; + } + return new PerDoc(this); + } + else + return docFreeList[--freeCount]; + } + } + + /// Fills in no-term-vectors for all docs we haven't seen + /// since the last doc that had term vectors. + /// + internal void Fill(int docID) + { + int docStoreOffset = docWriter.DocStoreOffset; + int end = docID + docStoreOffset; + if (lastDocID < end) + { + long tvfPosition = tvf.FilePointer; + while (lastDocID < end) + { + tvx.WriteLong(tvd.FilePointer); + tvd.WriteVInt(0); + tvx.WriteLong(tvfPosition); + lastDocID++; + } + } + } + + internal void InitTermVectorsWriter() + { + lock (this) + { + if (tvx == null) + { + + System.String docStoreSegment = docWriter.DocStoreSegment; + + if (docStoreSegment == null) + return ; + + System.Diagnostics.Debug.Assert(docStoreSegment != null); + + // If we hit an exception while init'ing the term + // vector output files, we must abort this segment + // because those files will be in an unknown + // state: + tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); + tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); + tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); + + tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); + tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); + tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); + + docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); + docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); + docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); + + lastDocID = 0; + } + } + } + + internal void FinishDocument(PerDoc perDoc) + { + lock (this) + { + + System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start")); + + InitTermVectorsWriter(); + + Fill(perDoc.docID); + + // Append term vectors to the real outputs: + tvx.WriteLong(tvd.FilePointer); + tvx.WriteLong(tvf.FilePointer); + tvd.WriteVInt(perDoc.numVectorFields); + if (perDoc.numVectorFields > 0) + { + for (int i = 0; i < perDoc.numVectorFields; i++) + tvd.WriteVInt(perDoc.fieldNumbers[i]); + System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]); + long lastPos = perDoc.fieldPointers[0]; + for (int i = 1; i < perDoc.numVectorFields; i++) + { + long pos = perDoc.fieldPointers[i]; + tvd.WriteVLong(pos - lastPos); + lastPos = pos; + } + perDoc.perDocTvf.WriteTo(tvf); + perDoc.numVectorFields = 0; + } + + System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.DocStoreOffset); + + lastDocID++; + perDoc.Reset(); + Free(perDoc); + System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end")); + } + } + + public bool FreeRAM() + { + // We don't hold any state beyond one doc, so we don't + // free persistent RAM here + return false; + } + + public override void Abort() + { + if (tvx != null) + { + try + { + tvx.Close(); + } + catch (System.Exception) + { + } + tvx = null; + } + if (tvd != null) + { + try + { + tvd.Close(); + } + catch (System.Exception) + { + } + tvd = null; + } + if (tvf != null) + { + try + { + tvf.Close(); + } + catch (System.Exception) + { + } + tvf = null; + } + lastDocID = 0; + } + + internal void Free(PerDoc doc) + { + lock (this) + { + System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); + docFreeList[freeCount++] = doc; + } + } + + internal class PerDoc:DocumentsWriter.DocWriter + { + public PerDoc(TermVectorsTermsWriter enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(TermVectorsTermsWriter enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + buffer = enclosingInstance.docWriter.NewPerDocBuffer(); + perDocTvf = new RAMOutputStream(buffer); + } + private TermVectorsTermsWriter enclosingInstance; + public TermVectorsTermsWriter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + internal DocumentsWriter.PerDocBuffer buffer; + internal RAMOutputStream perDocTvf; + internal int numVectorFields; + + internal int[] fieldNumbers = new int[1]; + internal long[] fieldPointers = new long[1]; + + internal void Reset() + { + perDocTvf.Reset(); + buffer.Recycle(); + numVectorFields = 0; + } + + public override void Abort() + { + Reset(); + Enclosing_Instance.Free(this); + } + + internal void AddField(int fieldNumber) + { + if (numVectorFields == fieldNumbers.Length) + { + fieldNumbers = ArrayUtil.Grow(fieldNumbers); + fieldPointers = ArrayUtil.Grow(fieldPointers); + } + fieldNumbers[numVectorFields] = fieldNumber; + fieldPointers[numVectorFields] = perDocTvf.FilePointer; + numVectorFields++; + } + + public override long SizeInBytes() + { + return buffer.SizeInBytes; + } + + public override void Finish() + { + Enclosing_Instance.FinishDocument(this); + } + } + + internal sealed class PostingList:RawPostingList + { + internal int freq; // How many times this term occurred in the current doc + internal int lastOffset; // Last offset we saw + internal int lastPosition; // Last position where this term occurred + } + + internal override int BytesPerPosting() + { + return RawPostingList.BYTES_SIZE + 3 * DocumentsWriter.INT_NUM_BYTE; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriterPerField.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriterPerField.cs new file mode 100644 index 0000000000..e6bb827741 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriterPerField.cs @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Documents; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Index +{ + + sealed class TermVectorsTermsWriterPerField:TermsHashConsumerPerField + { + + internal TermVectorsTermsWriterPerThread perThread; + internal TermsHashPerField termsHashPerField; + internal TermVectorsTermsWriter termsWriter; + internal FieldInfo fieldInfo; + internal DocumentsWriter.DocState docState; + internal FieldInvertState fieldState; + + internal bool doVectors; + internal bool doVectorPositions; + internal bool doVectorOffsets; + + internal int maxNumPostings; + internal IOffsetAttribute offsetAttribute = null; + + public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo) + { + this.termsHashPerField = termsHashPerField; + this.perThread = perThread; + this.termsWriter = perThread.termsWriter; + this.fieldInfo = fieldInfo; + docState = termsHashPerField.docState; + fieldState = termsHashPerField.fieldState; + } + + internal override int GetStreamCount() + { + return 2; + } + + internal override bool Start(IFieldable[] fields, int count) + { + doVectors = false; + doVectorPositions = false; + doVectorOffsets = false; + + for (int i = 0; i < count; i++) + { + IFieldable field = fields[i]; + if (field.IsIndexed && field.IsTermVectorStored) + { + doVectors = true; + doVectorPositions |= field.IsStorePositionWithTermVector; + doVectorOffsets |= field.IsStoreOffsetWithTermVector; + } + } + + if (doVectors) + { + if (perThread.doc == null) + { + perThread.doc = termsWriter.GetPerDoc(); + perThread.doc.docID = docState.docID; + System.Diagnostics.Debug.Assert(perThread.doc.numVectorFields == 0); + System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.Length); + System.Diagnostics.Debug.Assert(0 == perThread.doc.perDocTvf.FilePointer); + } + + System.Diagnostics.Debug.Assert(perThread.doc.docID == docState.docID); + if (termsHashPerField.numPostings != 0) + { + // Only necessary if previous doc hit a + // non-aborting exception while writing vectors in + // this field: + termsHashPerField.Reset(); + perThread.termsHashPerThread.Reset(false); + } + } + + // TODO: only if needed for performance + //perThread.postingsCount = 0; + + return doVectors; + } + + public void Abort() + { + } + + /// Called once per field per document if term vectors + /// are enabled, to write the vectors to + /// RAMOutputStream, which is then quickly flushed to + /// the real term vectors files in the Directory. + /// + internal override void Finish() + { + + System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.finish start")); + + int numPostings = termsHashPerField.numPostings; + + System.Diagnostics.Debug.Assert(numPostings >= 0); + + if (!doVectors || numPostings == 0) + return ; + + if (numPostings > maxNumPostings) + maxNumPostings = numPostings; + + IndexOutput tvf = perThread.doc.perDocTvf; + + // This is called once, after inverting all occurences + // of a given field in the doc. At this point we flush + // our hash into the DocWriter. + + System.Diagnostics.Debug.Assert(fieldInfo.storeTermVector); + System.Diagnostics.Debug.Assert(perThread.VectorFieldsInOrder(fieldInfo)); + + perThread.doc.AddField(termsHashPerField.fieldInfo.number); + + RawPostingList[] postings = termsHashPerField.SortPostings(); + + tvf.WriteVInt(numPostings); + byte bits = (byte) (0x0); + if (doVectorPositions) + bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR; + if (doVectorOffsets) + bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR; + tvf.WriteByte(bits); + + int encoderUpto = 0; + int lastTermBytesCount = 0; + + ByteSliceReader reader = perThread.vectorSliceReader; + char[][] charBuffers = perThread.termsHashPerThread.charPool.buffers; + for (int j = 0; j < numPostings; j++) + { + TermVectorsTermsWriter.PostingList posting = (TermVectorsTermsWriter.PostingList) postings[j]; + int freq = posting.freq; + + char[] text2 = charBuffers[posting.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + int start2 = posting.textStart & DocumentsWriter.CHAR_BLOCK_MASK; + + // We swap between two encoders to save copying + // last Term's byte array + UnicodeUtil.UTF8Result utf8Result = perThread.utf8Results[encoderUpto]; + + // TODO: we could do this incrementally + UnicodeUtil.UTF16toUTF8(text2, start2, utf8Result); + int termBytesCount = utf8Result.length; + + // TODO: UTF16toUTF8 could tell us this prefix + // Compute common prefix between last term and + // this term + int prefix = 0; + if (j > 0) + { + byte[] lastTermBytes = perThread.utf8Results[1 - encoderUpto].result; + byte[] termBytes = perThread.utf8Results[encoderUpto].result; + while (prefix < lastTermBytesCount && prefix < termBytesCount) + { + if (lastTermBytes[prefix] != termBytes[prefix]) + break; + prefix++; + } + } + encoderUpto = 1 - encoderUpto; + lastTermBytesCount = termBytesCount; + + int suffix = termBytesCount - prefix; + tvf.WriteVInt(prefix); + tvf.WriteVInt(suffix); + tvf.WriteBytes(utf8Result.result, prefix, suffix); + tvf.WriteVInt(freq); + + if (doVectorPositions) + { + termsHashPerField.InitReader(reader, posting, 0); + reader.WriteTo(tvf); + } + + if (doVectorOffsets) + { + termsHashPerField.InitReader(reader, posting, 1); + reader.WriteTo(tvf); + } + } + + termsHashPerField.Reset(); + + // NOTE: we clear, per-field, at the thread level, + // because term vectors fully write themselves on each + // field; this saves RAM (eg if large doc has two large + // fields w/ term vectors on) because we recycle/reuse + // all RAM after each field: + perThread.termsHashPerThread.Reset(false); + } + + internal void ShrinkHash() + { + termsHashPerField.ShrinkHash(maxNumPostings); + maxNumPostings = 0; + } + + internal override void Start(IFieldable f) + { + if (doVectorOffsets) + { + offsetAttribute = fieldState.attributeSource.AddAttribute(); + } + else + { + offsetAttribute = null; + } + } + + internal override void NewTerm(RawPostingList p0) + { + + System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.newTerm start")); + + TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList) p0; + + p.freq = 1; + + if (doVectorOffsets) + { + int startOffset = fieldState.offset + offsetAttribute.StartOffset; ; + int endOffset = fieldState.offset + offsetAttribute.EndOffset; + + termsHashPerField.WriteVInt(1, startOffset); + termsHashPerField.WriteVInt(1, endOffset - startOffset); + p.lastOffset = endOffset; + } + + if (doVectorPositions) + { + termsHashPerField.WriteVInt(0, fieldState.position); + p.lastPosition = fieldState.position; + } + } + + internal override void AddTerm(RawPostingList p0) + { + + System.Diagnostics.Debug.Assert(docState.TestPoint("TermVectorsTermsWriterPerField.addTerm start")); + + TermVectorsTermsWriter.PostingList p = (TermVectorsTermsWriter.PostingList) p0; + p.freq++; + + if (doVectorOffsets) + { + int startOffset = fieldState.offset + offsetAttribute.StartOffset; ; + int endOffset = fieldState.offset + offsetAttribute.EndOffset; + + termsHashPerField.WriteVInt(1, startOffset - p.lastOffset); + termsHashPerField.WriteVInt(1, endOffset - startOffset); + p.lastOffset = endOffset; + } + + if (doVectorPositions) + { + termsHashPerField.WriteVInt(0, fieldState.position - p.lastPosition); + p.lastPosition = fieldState.position; + } + } + + internal override void SkippingLongTerm() + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriterPerThread.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriterPerThread.cs new file mode 100644 index 0000000000..b08b920ada --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorsTermsWriterPerThread.cs @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Index +{ + + sealed class TermVectorsTermsWriterPerThread:TermsHashConsumerPerThread + { + + internal TermVectorsTermsWriter termsWriter; + internal TermsHashPerThread termsHashPerThread; + internal DocumentsWriter.DocState docState; + + internal TermVectorsTermsWriter.PerDoc doc; + + public TermVectorsTermsWriterPerThread(TermsHashPerThread termsHashPerThread, TermVectorsTermsWriter termsWriter) + { + this.termsWriter = termsWriter; + this.termsHashPerThread = termsHashPerThread; + docState = termsHashPerThread.docState; + } + + // Used by perField when serializing the term vectors + internal ByteSliceReader vectorSliceReader = new ByteSliceReader(); + + internal UnicodeUtil.UTF8Result[] utf8Results = new UnicodeUtil.UTF8Result[]{new UnicodeUtil.UTF8Result(), new UnicodeUtil.UTF8Result()}; + + public override void StartDocument() + { + System.Diagnostics.Debug.Assert(ClearLastVectorFieldName()); + if (doc != null) + { + doc.Reset(); + doc.docID = docState.docID; + } + } + + public override DocumentsWriter.DocWriter FinishDocument() + { + try + { + return doc; + } + finally + { + doc = null; + } + } + + public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) + { + return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo); + } + + public override void Abort() + { + if (doc != null) + { + doc.Abort(); + doc = null; + } + } + + // Called only by assert + internal bool ClearLastVectorFieldName() + { + lastVectorFieldName = null; + return true; + } + + // Called only by assert + internal System.String lastVectorFieldName; + internal bool VectorFieldsInOrder(FieldInfo fi) + { + try + { + if (lastVectorFieldName != null) + return String.CompareOrdinal(lastVectorFieldName, fi.name) < 0; + else + return true; + } + finally + { + lastVectorFieldName = fi.name; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermVectorsWriter.cs b/external/Lucene.Net.Light/src/core/Index/TermVectorsWriter.cs new file mode 100644 index 0000000000..ebaa4f4d97 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermVectorsWriter.cs @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Directory = Lucene.Net.Store.Directory; +using IndexOutput = Lucene.Net.Store.IndexOutput; +using StringHelper = Lucene.Net.Util.StringHelper; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Index +{ + sealed class TermVectorsWriter : IDisposable + { + + private readonly IndexOutput tvx = null; + private readonly IndexOutput tvd = null; + private readonly IndexOutput tvf = null; + private readonly FieldInfos fieldInfos; + internal UnicodeUtil.UTF8Result[] utf8Results = new[]{new UnicodeUtil.UTF8Result(), new UnicodeUtil.UTF8Result()}; + + public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) + { + // Open files for TermVector storage + tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); + tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT); + tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); + tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT); + tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); + tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT); + + this.fieldInfos = fieldInfos; + } + + /// Add a complete document specified by all its term vectors. If document has no + /// term vectors, add value for tvx. + /// + /// + /// + /// + /// IOException + public void AddAllDocVectors(ITermFreqVector[] vectors) + { + + tvx.WriteLong(tvd.FilePointer); + tvx.WriteLong(tvf.FilePointer); + + if (vectors != null) + { + int numFields = vectors.Length; + tvd.WriteVInt(numFields); + + var fieldPointers = new long[numFields]; + + for (int i = 0; i < numFields; i++) + { + fieldPointers[i] = tvf.FilePointer; + + int fieldNumber = fieldInfos.FieldNumber(vectors[i].Field); + + // 1st pass: write field numbers to tvd + tvd.WriteVInt(fieldNumber); + + int numTerms = vectors[i].Size; + tvf.WriteVInt(numTerms); + + TermPositionVector tpVector; + + byte bits; + bool storePositions; + bool storeOffsets; + + if (vectors[i] is TermPositionVector) + { + // May have positions & offsets + tpVector = (TermPositionVector) vectors[i]; + storePositions = tpVector.Size > 0 && tpVector.GetTermPositions(0) != null; + storeOffsets = tpVector.Size > 0 && tpVector.GetOffsets(0) != null; + bits = (byte) ((storePositions?TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR: (byte) 0) + (storeOffsets?TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR: (byte) 0)); + } + else + { + tpVector = null; + bits = 0; + storePositions = false; + storeOffsets = false; + } + + tvf.WriteVInt(bits); + + System.String[] terms = vectors[i].GetTerms(); + int[] freqs = vectors[i].GetTermFrequencies(); + + int utf8Upto = 0; + utf8Results[1].length = 0; + + for (int j = 0; j < numTerms; j++) + { + + UnicodeUtil.UTF16toUTF8(terms[j], 0, terms[j].Length, utf8Results[utf8Upto]); + + int start = StringHelper.BytesDifference(utf8Results[1 - utf8Upto].result, utf8Results[1 - utf8Upto].length, utf8Results[utf8Upto].result, utf8Results[utf8Upto].length); + int length = utf8Results[utf8Upto].length - start; + tvf.WriteVInt(start); // write shared prefix length + tvf.WriteVInt(length); // write delta length + tvf.WriteBytes(utf8Results[utf8Upto].result, start, length); // write delta bytes + utf8Upto = 1 - utf8Upto; + + int termFreq = freqs[j]; + + tvf.WriteVInt(termFreq); + + if (storePositions) + { + int[] positions = tpVector.GetTermPositions(j); + if (positions == null) + throw new System.SystemException("Trying to write positions that are null!"); + System.Diagnostics.Debug.Assert(positions.Length == termFreq); + + // use delta encoding for positions + int lastPosition = 0; + foreach (int position in positions) + { + tvf.WriteVInt(position - lastPosition); + lastPosition = position; + } + } + + if (storeOffsets) + { + TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j); + if (offsets == null) + throw new System.SystemException("Trying to write offsets that are null!"); + System.Diagnostics.Debug.Assert(offsets.Length == termFreq); + + // use delta encoding for offsets + int lastEndOffset = 0; + foreach (TermVectorOffsetInfo t in offsets) + { + int startOffset = t.StartOffset; + int endOffset = t.EndOffset; + tvf.WriteVInt(startOffset - lastEndOffset); + tvf.WriteVInt(endOffset - startOffset); + lastEndOffset = endOffset; + } + } + } + } + + // 2nd pass: write field pointers to tvd + if (numFields > 1) + { + long lastFieldPointer = fieldPointers[0]; + for (int i = 1; i < numFields; i++) + { + long fieldPointer = fieldPointers[i]; + tvd.WriteVLong(fieldPointer - lastFieldPointer); + lastFieldPointer = fieldPointer; + } + } + } + else + tvd.WriteVInt(0); + } + + /// Do a bulk copy of numDocs documents from reader to our + /// streams. This is used to expedite merging, if the + /// field numbers are congruent. + /// + internal void AddRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) + { + long tvdPosition = tvd.FilePointer; + long tvfPosition = tvf.FilePointer; + long tvdStart = tvdPosition; + long tvfStart = tvfPosition; + for (int i = 0; i < numDocs; i++) + { + tvx.WriteLong(tvdPosition); + tvdPosition += tvdLengths[i]; + tvx.WriteLong(tvfPosition); + tvfPosition += tvfLengths[i]; + } + tvd.CopyBytes(reader.GetTvdStream(), tvdPosition - tvdStart); + tvf.CopyBytes(reader.GetTvfStream(), tvfPosition - tvfStart); + System.Diagnostics.Debug.Assert(tvd.FilePointer == tvdPosition); + System.Diagnostics.Debug.Assert(tvf.FilePointer == tvfPosition); + } + + /// Close all streams. + public void Dispose() + { + // Move to a protected method if class becomes unsealed + + // make an effort to close all streams we can but remember and re-throw + // the first exception encountered in this process + System.IO.IOException keep = null; + if (tvx != null) + try + { + tvx.Close(); + } + catch (System.IO.IOException e) + { + keep = e; + } + if (tvd != null) + try + { + tvd.Close(); + } + catch (System.IO.IOException e) + { + if (keep == null) + keep = e; + } + if (tvf != null) + try + { + tvf.Close(); + } + catch (System.IO.IOException e) + { + if (keep == null) + keep = e; + } + if (keep != null) + { + throw new System.IO.IOException(keep.StackTrace); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermsHash.cs b/external/Lucene.Net.Light/src/core/Index/TermsHash.cs new file mode 100644 index 0000000000..97ae1ebd74 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermsHash.cs @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using ArrayUtil = Lucene.Net.Util.ArrayUtil; + +namespace Lucene.Net.Index +{ + + /// This class implements , which + /// is passed each token produced by the analyzer on each + /// field. It stores these tokens in a hash table, and + /// allocates separate byte streams per token. Consumers of + /// this class, eg and + ///, write their own byte streams + /// under each term. + /// + sealed class TermsHash : InvertedDocConsumer + { + + internal TermsHashConsumer consumer; + internal TermsHash nextTermsHash; + internal int bytesPerPosting; + internal int postingsFreeChunk; + internal DocumentsWriter docWriter; + private RawPostingList[] postingsFreeList = new RawPostingList[1]; + private int postingsFreeCount; + private int postingsAllocCount; + internal bool trackAllocations; + + public TermsHash(DocumentsWriter docWriter, bool trackAllocations, TermsHashConsumer consumer, TermsHash nextTermsHash) + { + this.docWriter = docWriter; + this.consumer = consumer; + this.nextTermsHash = nextTermsHash; + this.trackAllocations = trackAllocations; + + // Why + 4*POINTER_NUM_BYTE below? + // +1: Posting is referenced by postingsFreeList array + // +3: Posting is referenced by hash, which + // targets 25-50% fill factor; approximate this + // as 3X # pointers + bytesPerPosting = consumer.BytesPerPosting() + 4 * DocumentsWriter.POINTER_NUM_BYTE; + postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting); + } + + internal override InvertedDocConsumerPerThread AddThread(DocInverterPerThread docInverterPerThread) + { + return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null); + } + + internal TermsHashPerThread AddThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) + { + return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread); + } + + internal override void SetFieldInfos(FieldInfos fieldInfos) + { + this.fieldInfos = fieldInfos; + consumer.SetFieldInfos(fieldInfos); + } + + // NOTE: do not make this sync'd; it's not necessary (DW + // ensures all other threads are idle), and it leads to + // deadlock + public override void Abort() + { + consumer.Abort(); + if (nextTermsHash != null) + nextTermsHash.Abort(); + } + + internal void ShrinkFreePostings(IDictionary> threadsAndFields, SegmentWriteState state) + { + + System.Diagnostics.Debug.Assert(postingsFreeCount == postingsAllocCount, "Thread.currentThread().getName()" + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer); + + int newSize = 1; + if (newSize != postingsFreeList.Length) + { + if (postingsFreeCount > newSize) + { + if (trackAllocations) + { + docWriter.BytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); + } + postingsFreeCount = newSize; + postingsAllocCount = newSize; + } + + RawPostingList[] newArray = new RawPostingList[newSize]; + Array.Copy(postingsFreeList, 0, newArray, 0, postingsFreeCount); + postingsFreeList = newArray; + } + } + + internal override void CloseDocStore(SegmentWriteState state) + { + lock (this) + { + consumer.CloseDocStore(state); + if (nextTermsHash != null) + nextTermsHash.CloseDocStore(state); + } + } + + internal override void Flush(IDictionary> threadsAndFields, SegmentWriteState state) + { + lock (this) + { + var childThreadsAndFields = new Dictionary>(); + Dictionary> nextThreadsAndFields; + + if (nextTermsHash != null) + { + nextThreadsAndFields = new Dictionary>(); + } + else + nextThreadsAndFields = null; + + foreach (var entry in threadsAndFields) + { + TermsHashPerThread perThread = (TermsHashPerThread) entry.Key; + + ICollection fields = entry.Value; + + var fieldsIt = fields.GetEnumerator(); + ICollection childFields = new HashSet(); + ICollection nextChildFields; + + if (nextTermsHash != null) + { + nextChildFields = new HashSet(); + } + else + nextChildFields = null; + + while (fieldsIt.MoveNext()) + { + TermsHashPerField perField = (TermsHashPerField) fieldsIt.Current; + childFields.Add(perField.consumer); + if (nextTermsHash != null) + nextChildFields.Add(perField.nextPerField); + } + + childThreadsAndFields[perThread.consumer] = childFields; + if (nextTermsHash != null) + nextThreadsAndFields[perThread.nextPerThread] = nextChildFields; + } + + consumer.Flush(childThreadsAndFields, state); + + ShrinkFreePostings(threadsAndFields, state); + + if (nextTermsHash != null) + nextTermsHash.Flush(nextThreadsAndFields, state); + } + } + + public override bool FreeRAM() + { + if (!trackAllocations) + return false; + + bool any; + long bytesFreed = 0; + lock (this) + { + int numToFree; + if (postingsFreeCount >= postingsFreeChunk) + numToFree = postingsFreeChunk; + else + numToFree = postingsFreeCount; + any = numToFree > 0; + if (any) + { + for (int i = postingsFreeCount - numToFree; i < postingsFreeCount; i++) + { + postingsFreeList[i] = null; + } + //Arrays.fill(postingsFreeList, postingsFreeCount - numToFree, postingsFreeCount, null); + postingsFreeCount -= numToFree; + postingsAllocCount -= numToFree; + bytesFreed = -numToFree * bytesPerPosting; + any = true; + } + } + + if (any) + { + docWriter.BytesAllocated(bytesFreed); + } + + if (nextTermsHash != null) + any |= nextTermsHash.FreeRAM(); + + return any; + } + + public void RecyclePostings(RawPostingList[] postings, int numPostings) + { + lock (this) + { + + System.Diagnostics.Debug.Assert(postings.Length >= numPostings); + + // Move all Postings from this ThreadState back to our + // free list. We pre-allocated this array while we were + // creating Postings to make sure it's large enough + System.Diagnostics.Debug.Assert(postingsFreeCount + numPostings <= postingsFreeList.Length); + Array.Copy(postings, 0, postingsFreeList, postingsFreeCount, numPostings); + postingsFreeCount += numPostings; + } + } + + public void GetPostings(RawPostingList[] postings) + { + lock (this) + { + + System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings start")); + + System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsFreeList.Length); + System.Diagnostics.Debug.Assert(postingsFreeCount <= postingsAllocCount, "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount); + + int numToCopy; + if (postingsFreeCount < postings.Length) + numToCopy = postingsFreeCount; + else + numToCopy = postings.Length; + int start = postingsFreeCount - numToCopy; + System.Diagnostics.Debug.Assert(start >= 0); + System.Diagnostics.Debug.Assert(start + numToCopy <= postingsFreeList.Length); + System.Diagnostics.Debug.Assert(numToCopy <= postings.Length); + Array.Copy(postingsFreeList, start, postings, 0, numToCopy); + + // Directly allocate the remainder if any + if (numToCopy != postings.Length) + { + int extra = postings.Length - numToCopy; + int newPostingsAllocCount = postingsAllocCount + extra; + + consumer.CreatePostings(postings, numToCopy, extra); + System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermsHash.getPostings after create")); + postingsAllocCount += extra; + + if (trackAllocations) + docWriter.BytesAllocated(extra * bytesPerPosting); + + if (newPostingsAllocCount > postingsFreeList.Length) + // Pre-allocate the postingsFreeList so it's large + // enough to hold all postings we've given out + postingsFreeList = new RawPostingList[ArrayUtil.GetNextSize(newPostingsAllocCount)]; + } + + postingsFreeCount -= numToCopy; + + if (trackAllocations) + docWriter.BytesUsed(postings.Length * bytesPerPosting); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermsHashConsumer.cs b/external/Lucene.Net.Light/src/core/Index/TermsHashConsumer.cs new file mode 100644 index 0000000000..210737599b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermsHashConsumer.cs @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + + abstract class TermsHashConsumer + { + internal abstract int BytesPerPosting(); + internal abstract void CreatePostings(RawPostingList[] postings, int start, int count); + public abstract TermsHashConsumerPerThread AddThread(TermsHashPerThread perThread); + public abstract void Flush(IDictionary> threadsAndFields, SegmentWriteState state); + public abstract void Abort(); + internal abstract void CloseDocStore(SegmentWriteState state); + + internal FieldInfos fieldInfos; + + internal virtual void SetFieldInfos(FieldInfos fieldInfos) + { + this.fieldInfos = fieldInfos; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermsHashConsumerPerField.cs b/external/Lucene.Net.Light/src/core/Index/TermsHashConsumerPerField.cs new file mode 100644 index 0000000000..11002cb9e8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermsHashConsumerPerField.cs @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + /// Implement this class to plug into the TermsHash + /// processor, which inverts and stores Tokens into a hash + /// table and provides an API for writing bytes into + /// multiple streams for each unique Token. + /// + abstract class TermsHashConsumerPerField + { + internal abstract bool Start(IFieldable[] fields, int count); + internal abstract void Finish(); + internal abstract void SkippingLongTerm(); + internal abstract void Start(IFieldable field); + internal abstract void NewTerm(RawPostingList p); + internal abstract void AddTerm(RawPostingList p); + internal abstract int GetStreamCount(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermsHashConsumerPerThread.cs b/external/Lucene.Net.Light/src/core/Index/TermsHashConsumerPerThread.cs new file mode 100644 index 0000000000..fd98eaa60e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermsHashConsumerPerThread.cs @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + abstract class TermsHashConsumerPerThread + { + public abstract void StartDocument(); + public abstract DocumentsWriter.DocWriter FinishDocument(); + abstract public TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo); + abstract public void Abort(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermsHashPerField.cs b/external/Lucene.Net.Light/src/core/Index/TermsHashPerField.cs new file mode 100644 index 0000000000..87c1352f47 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermsHashPerField.cs @@ -0,0 +1,639 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Documents; +using Lucene.Net.Support; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Index +{ + + sealed class TermsHashPerField:InvertedDocConsumerPerField + { + private void InitBlock() + { + postingsHashHalfSize = postingsHashSize / 2; + postingsHashMask = postingsHashSize - 1; + postingsHash = new RawPostingList[postingsHashSize]; + } + + internal TermsHashConsumerPerField consumer; + internal TermsHashPerField nextPerField; + internal TermsHashPerThread perThread; + internal DocumentsWriter.DocState docState; + internal FieldInvertState fieldState; + internal ITermAttribute termAtt; + + // Copied from our perThread + internal CharBlockPool charPool; + internal IntBlockPool intPool; + internal ByteBlockPool bytePool; + + internal int streamCount; + internal int numPostingInt; + + internal FieldInfo fieldInfo; + + internal bool postingsCompacted; + internal int numPostings; + private int postingsHashSize = 4; + private int postingsHashHalfSize; + private int postingsHashMask; + private RawPostingList[] postingsHash; + private RawPostingList p; + + public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHashPerThread perThread, TermsHashPerThread nextPerThread, FieldInfo fieldInfo) + { + InitBlock(); + this.perThread = perThread; + intPool = perThread.intPool; + charPool = perThread.charPool; + bytePool = perThread.bytePool; + docState = perThread.docState; + fieldState = docInverterPerField.fieldState; + this.consumer = perThread.consumer.AddField(this, fieldInfo); + streamCount = consumer.GetStreamCount(); + numPostingInt = 2 * streamCount; + this.fieldInfo = fieldInfo; + if (nextPerThread != null) + nextPerField = (TermsHashPerField) nextPerThread.AddField(docInverterPerField, fieldInfo); + else + nextPerField = null; + } + + internal void ShrinkHash(int targetSize) + { + System.Diagnostics.Debug.Assert(postingsCompacted || numPostings == 0); + + int newSize = 4; + + if (newSize != postingsHash.Length) + { + postingsHash = new RawPostingList[newSize]; + postingsHashSize = newSize; + postingsHashHalfSize = newSize / 2; + postingsHashMask = newSize - 1; + } + System.Array.Clear(postingsHash,0,postingsHash.Length); + } + + public void Reset() + { + if (!postingsCompacted) + CompactPostings(); + System.Diagnostics.Debug.Assert(numPostings <= postingsHash.Length); + if (numPostings > 0) + { + perThread.termsHash.RecyclePostings(postingsHash, numPostings); + Array.Clear(postingsHash, 0, numPostings); + numPostings = 0; + } + postingsCompacted = false; + if (nextPerField != null) + nextPerField.Reset(); + } + + public override void Abort() + { + lock (this) + { + Reset(); + if (nextPerField != null) + nextPerField.Abort(); + } + } + + public void InitReader(ByteSliceReader reader, RawPostingList p, int stream) + { + System.Diagnostics.Debug.Assert(stream < streamCount); + int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; + int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK; + reader.Init(bytePool, p.byteStart + stream * ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto + stream]); + } + + private void CompactPostings() + { + lock (this) + { + int upto = 0; + for (int i = 0; i < postingsHashSize; i++) + { + if (postingsHash[i] != null) + { + if (upto < i) + { + postingsHash[upto] = postingsHash[i]; + postingsHash[i] = null; + } + upto++; + } + } + + System.Diagnostics.Debug.Assert(upto == numPostings); + postingsCompacted = true; + } + } + + /// Collapse the hash table & sort in-place. + public RawPostingList[] SortPostings() + { + CompactPostings(); + QuickSort(postingsHash, 0, numPostings - 1); + return postingsHash; + } + + internal void QuickSort(RawPostingList[] postings, int lo, int hi) + { + if (lo >= hi) + return ; + else if (hi == 1 + lo) + { + if (ComparePostings(postings[lo], postings[hi]) > 0) + { + RawPostingList tmp = postings[lo]; + postings[lo] = postings[hi]; + postings[hi] = tmp; + } + return ; + } + + int mid = Number.URShift((lo + hi), 1); + + if (ComparePostings(postings[lo], postings[mid]) > 0) + { + RawPostingList tmp = postings[lo]; + postings[lo] = postings[mid]; + postings[mid] = tmp; + } + + if (ComparePostings(postings[mid], postings[hi]) > 0) + { + RawPostingList tmp = postings[mid]; + postings[mid] = postings[hi]; + postings[hi] = tmp; + + if (ComparePostings(postings[lo], postings[mid]) > 0) + { + RawPostingList tmp2 = postings[lo]; + postings[lo] = postings[mid]; + postings[mid] = tmp2; + } + } + + int left = lo + 1; + int right = hi - 1; + + if (left >= right) + return ; + + RawPostingList partition = postings[mid]; + + for (; ; ) + { + while (ComparePostings(postings[right], partition) > 0) + --right; + + while (left < right && ComparePostings(postings[left], partition) <= 0) + ++left; + + if (left < right) + { + RawPostingList tmp = postings[left]; + postings[left] = postings[right]; + postings[right] = tmp; + --right; + } + else + { + break; + } + } + + QuickSort(postings, lo, left); + QuickSort(postings, left + 1, hi); + } + + /// Compares term text for two Posting instance and + /// returns -1 if p1 < p2; 1 if p1 > p2; else 0. + /// + internal int ComparePostings(RawPostingList p1, RawPostingList p2) + { + + if (p1 == p2) + return 0; + + char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK; + char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK; + + System.Diagnostics.Debug.Assert(text1 != text2 || pos1 != pos2); + + while (true) + { + char c1 = text1[pos1++]; + char c2 = text2[pos2++]; + if (c1 != c2) + { + if (0xffff == c2) + return 1; + else if (0xffff == c1) + return - 1; + else + return c1 - c2; + } + else + // This method should never compare equal postings + // unless p1==p2 + System.Diagnostics.Debug.Assert(c1 != 0xffff); + } + } + + /// Test whether the text for current RawPostingList p equals + /// current tokenText. + /// + private bool PostingEquals(char[] tokenText, int tokenTextLen) + { + + char[] text = perThread.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + System.Diagnostics.Debug.Assert(text != null); + int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK; + + int tokenPos = 0; + for (; tokenPos < tokenTextLen; pos++, tokenPos++) + if (tokenText[tokenPos] != text[pos]) + return false; + return 0xffff == text[pos]; + } + + private bool doCall; + private bool doNextCall; + + internal override void Start(IFieldable f) + { + termAtt = fieldState.attributeSource.AddAttribute(); + consumer.Start(f); + if (nextPerField != null) + { + nextPerField.Start(f); + } + } + + internal override bool Start(IFieldable[] fields, int count) + { + doCall = consumer.Start(fields, count); + if (nextPerField != null) + doNextCall = nextPerField.Start(fields, count); + return doCall || doNextCall; + } + + // Secondary entry point (for 2nd & subsequent TermsHash), + // because token text has already been "interned" into + // textStart, so we hash by textStart + public void Add(int textStart) + { + + int code = textStart; + + int hashPos = code & postingsHashMask; + + System.Diagnostics.Debug.Assert(!postingsCompacted); + + // Locate RawPostingList in hash + p = postingsHash[hashPos]; + + if (p != null && p.textStart != textStart) + { + // Conflict: keep searching different locations in + // the hash table. + int inc = ((code >> 8) + code) | 1; + do + { + code += inc; + hashPos = code & postingsHashMask; + p = postingsHash[hashPos]; + } + while (p != null && p.textStart != textStart); + } + + if (p == null) + { + + // First time we are seeing this token since we last + // flushed the hash. + + // Refill? + if (0 == perThread.freePostingsCount) + perThread.MorePostings(); + + // Pull next free RawPostingList from free list + p = perThread.freePostings[--perThread.freePostingsCount]; + System.Diagnostics.Debug.Assert(p != null); + + p.textStart = textStart; + + System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null); + postingsHash[hashPos] = p; + numPostings++; + + if (numPostings == postingsHashHalfSize) + RehashPostings(2 * postingsHashSize); + + // Init stream slices + if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) + intPool.NextBuffer(); + + if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE) + bytePool.NextBuffer(); + + intUptos = intPool.buffer; + intUptoStart = intPool.intUpto; + intPool.intUpto += streamCount; + + p.intStart = intUptoStart + intPool.intOffset; + + for (int i = 0; i < streamCount; i++) + { + int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE); + intUptos[intUptoStart + i] = upto + bytePool.byteOffset; + } + p.byteStart = intUptos[intUptoStart]; + + consumer.NewTerm(p); + } + else + { + intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; + intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; + consumer.AddTerm(p); + } + } + + // Primary entry point (for first TermsHash) + internal override void Add() + { + + System.Diagnostics.Debug.Assert(!postingsCompacted); + + // We are first in the chain so we must "intern" the + // term text into textStart address + + // Get the text of this term. + char[] tokenText = termAtt.TermBuffer(); + ; + int tokenTextLen = termAtt.TermLength(); + + // Compute hashcode & replace any invalid UTF16 sequences + int downto = tokenTextLen; + int code = 0; + while (downto > 0) + { + char ch = tokenText[--downto]; + + if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) + { + if (0 == downto) + { + // Unpaired + ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR); + } + else + { + char ch2 = tokenText[downto - 1]; + if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) + { + // OK: high followed by low. This is a valid + // surrogate pair. + code = ((code * 31) + ch) * 31 + ch2; + downto--; + continue; + } + else + { + // Unpaired + ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR); + } + } + } + else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && (ch <= UnicodeUtil.UNI_SUR_HIGH_END || ch == 0xffff)) + { + // Unpaired or 0xffff + ch = tokenText[downto] = (char) (UnicodeUtil.UNI_REPLACEMENT_CHAR); + } + + code = (code * 31) + ch; + } + + int hashPos = code & postingsHashMask; + + // Locate RawPostingList in hash + p = postingsHash[hashPos]; + + if (p != null && !PostingEquals(tokenText, tokenTextLen)) + { + // Conflict: keep searching different locations in + // the hash table. + int inc = ((code >> 8) + code) | 1; + do + { + code += inc; + hashPos = code & postingsHashMask; + p = postingsHash[hashPos]; + } + while (p != null && !PostingEquals(tokenText, tokenTextLen)); + } + + if (p == null) + { + + // First time we are seeing this token since we last + // flushed the hash. + int textLen1 = 1 + tokenTextLen; + if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) + { + if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) + { + // Just skip this term, to remain as robust as + // possible during indexing. A TokenFilter + // can be inserted into the analyzer chain if + // other behavior is wanted (pruning the term + // to a prefix, throwing an exception, etc). + + if (docState.maxTermPrefix == null) + docState.maxTermPrefix = new System.String(tokenText, 0, 30); + + consumer.SkippingLongTerm(); + return ; + } + charPool.NextBuffer(); + } + + // Refill? + if (0 == perThread.freePostingsCount) + perThread.MorePostings(); + + // Pull next free RawPostingList from free list + p = perThread.freePostings[--perThread.freePostingsCount]; + System.Diagnostics.Debug.Assert(p != null); + + char[] text = charPool.buffer; + int textUpto = charPool.charUpto; + p.textStart = textUpto + charPool.charOffset; + charPool.charUpto += textLen1; + Array.Copy(tokenText, 0, text, textUpto, tokenTextLen); + text[textUpto + tokenTextLen] = (char) (0xffff); + + System.Diagnostics.Debug.Assert(postingsHash [hashPos] == null); + postingsHash[hashPos] = p; + numPostings++; + + if (numPostings == postingsHashHalfSize) + RehashPostings(2 * postingsHashSize); + + // Init stream slices + if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) + intPool.NextBuffer(); + + if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt * ByteBlockPool.FIRST_LEVEL_SIZE) + bytePool.NextBuffer(); + + intUptos = intPool.buffer; + intUptoStart = intPool.intUpto; + intPool.intUpto += streamCount; + + p.intStart = intUptoStart + intPool.intOffset; + + for (int i = 0; i < streamCount; i++) + { + int upto = bytePool.NewSlice(ByteBlockPool.FIRST_LEVEL_SIZE); + intUptos[intUptoStart + i] = upto + bytePool.byteOffset; + } + p.byteStart = intUptos[intUptoStart]; + + consumer.NewTerm(p); + } + else + { + intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; + intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; + consumer.AddTerm(p); + } + + if (doNextCall) + nextPerField.Add(p.textStart); + } + + internal int[] intUptos; + internal int intUptoStart; + + internal void WriteByte(int stream, byte b) + { + int upto = intUptos[intUptoStart + stream]; + byte[] bytes = bytePool.buffers[upto >> DocumentsWriter.BYTE_BLOCK_SHIFT]; + System.Diagnostics.Debug.Assert(bytes != null); + int offset = upto & DocumentsWriter.BYTE_BLOCK_MASK; + if (bytes[offset] != 0) + { + // End of slice; allocate a new one + offset = bytePool.AllocSlice(bytes, offset); + bytes = bytePool.buffer; + intUptos[intUptoStart + stream] = offset + bytePool.byteOffset; + } + bytes[offset] = b; + (intUptos[intUptoStart + stream])++; + } + + public void WriteBytes(int stream, byte[] b, int offset, int len) + { + // TODO: optimize + int end = offset + len; + for (int i = offset; i < end; i++) + WriteByte(stream, b[i]); + } + + internal void WriteVInt(int stream, int i) + { + System.Diagnostics.Debug.Assert(stream < streamCount); + while ((i & ~ 0x7F) != 0) + { + WriteByte(stream, (byte) ((i & 0x7f) | 0x80)); + i = Number.URShift(i, 7); + } + WriteByte(stream, (byte) i); + } + + internal override void Finish() + { + consumer.Finish(); + if (nextPerField != null) + nextPerField.Finish(); + } + + /// Called when postings hash is too small (> 50% + /// occupied) or too large (< 20% occupied). + /// + internal void RehashPostings(int newSize) + { + + int newMask = newSize - 1; + + RawPostingList[] newHash = new RawPostingList[newSize]; + for (int i = 0; i < postingsHashSize; i++) + { + RawPostingList p0 = postingsHash[i]; + if (p0 != null) + { + int code; + if (perThread.primary) + { + int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK; + char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; + int pos = start; + while (text[pos] != 0xffff) + pos++; + code = 0; + while (pos > start) + code = (code * 31) + text[--pos]; + } + else + code = p0.textStart; + + int hashPos = code & newMask; + System.Diagnostics.Debug.Assert(hashPos >= 0); + if (newHash[hashPos] != null) + { + int inc = ((code >> 8) + code) | 1; + do + { + code += inc; + hashPos = code & newMask; + } + while (newHash[hashPos] != null); + } + newHash[hashPos] = p0; + } + } + + postingsHashMask = newMask; + postingsHash = newHash; + postingsHashSize = newSize; + postingsHashHalfSize = newSize >> 1; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Index/TermsHashPerThread.cs b/external/Lucene.Net.Light/src/core/Index/TermsHashPerThread.cs new file mode 100644 index 0000000000..f7f550c190 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Index/TermsHashPerThread.cs @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Index +{ + + sealed class TermsHashPerThread:InvertedDocConsumerPerThread + { + + internal TermsHash termsHash; + internal TermsHashConsumerPerThread consumer; + internal TermsHashPerThread nextPerThread; + + internal CharBlockPool charPool; + internal IntBlockPool intPool; + internal ByteBlockPool bytePool; + internal bool primary; + internal DocumentsWriter.DocState docState; + + internal RawPostingList[] freePostings = new RawPostingList[256]; + internal int freePostingsCount; + + public TermsHashPerThread(DocInverterPerThread docInverterPerThread, TermsHash termsHash, TermsHash nextTermsHash, TermsHashPerThread primaryPerThread) + { + docState = docInverterPerThread.docState; + + this.termsHash = termsHash; + this.consumer = termsHash.consumer.AddThread(this); + + if (nextTermsHash != null) + { + // We are primary + charPool = new CharBlockPool(termsHash.docWriter); + primary = true; + } + else + { + charPool = primaryPerThread.charPool; + primary = false; + } + + intPool = new IntBlockPool(termsHash.docWriter, termsHash.trackAllocations); + bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator, termsHash.trackAllocations); + + if (nextTermsHash != null) + nextPerThread = nextTermsHash.AddThread(docInverterPerThread, this); + else + nextPerThread = null; + } + + internal override InvertedDocConsumerPerField AddField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo) + { + return new TermsHashPerField(docInverterPerField, this, nextPerThread, fieldInfo); + } + + public override void Abort() + { + lock (this) + { + Reset(true); + consumer.Abort(); + if (nextPerThread != null) + nextPerThread.Abort(); + } + } + + // perField calls this when it needs more postings: + internal void MorePostings() + { + System.Diagnostics.Debug.Assert(freePostingsCount == 0); + termsHash.GetPostings(freePostings); + freePostingsCount = freePostings.Length; + System.Diagnostics.Debug.Assert(noNullPostings(freePostings, freePostingsCount, "consumer=" + consumer)); + } + + private static bool noNullPostings(RawPostingList[] postings, int count, System.String details) + { + for (int i = 0; i < count; i++) + System.Diagnostics.Debug.Assert(postings[i] != null, "postings[" + i + "] of " + count + " is null: " + details); + return true; + } + + public override void StartDocument() + { + consumer.StartDocument(); + if (nextPerThread != null) + nextPerThread.consumer.StartDocument(); + } + + public override DocumentsWriter.DocWriter FinishDocument() + { + DocumentsWriter.DocWriter doc = consumer.FinishDocument(); + + DocumentsWriter.DocWriter doc2; + if (nextPerThread != null) + doc2 = nextPerThread.consumer.FinishDocument(); + else + doc2 = null; + if (doc == null) + return doc2; + else + { + doc.SetNext(doc2); + return doc; + } + } + + // Clear all state + internal void Reset(bool recyclePostings) + { + intPool.Reset(); + bytePool.Reset(); + + if (primary) + charPool.Reset(); + + if (recyclePostings) + { + termsHash.RecyclePostings(freePostings, freePostingsCount); + freePostingsCount = 0; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/LZOCompressor.cs b/external/Lucene.Net.Light/src/core/LZOCompressor.cs new file mode 100644 index 0000000000..da0d715eb5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/LZOCompressor.cs @@ -0,0 +1,135 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +// LZO.Net +// $Id: LZOCompressor.cs,v 1.1 2004/02/22 17:44:04 laptop Exp $ + +namespace Simplicit.Net.Lzo { + using System; + using System.Diagnostics; + using System.Runtime.InteropServices; + + /// + /// Wrapper class for the highly performant LZO compression library + /// + public class LZOCompressor { + private static TraceSwitch _traceSwitch = new TraceSwitch("Simplicit.Net.Lzo", "Switch for tracing of the LZOCompressor-Class"); + + #region Dll-Imports + [DllImport("lzo.dll")] + private static extern int __lzo_init3(); + [DllImport("lzo.dll")] + private static extern string lzo_version_string(); + [DllImport("lzo.dll")] + private static extern string lzo_version_date(); + [DllImport("lzo.dll")] + private static extern int lzo1x_1_compress( + byte[] src, + int src_len, + byte[] dst, + ref int dst_len, + byte[] wrkmem + ); + [DllImport("lzo.dll")] + private static extern int lzo1x_decompress( + byte[] src, + int src_len, + byte[] dst, + ref int dst_len, + byte[] wrkmem); + #endregion + + private byte[] _workMemory = new byte[16384L * 4]; + + static LZOCompressor() { + int init = __lzo_init3(); + if(init != 0) { + throw new Exception("Initialization of LZO-Compressor failed !"); + } + } + + /// + /// Constructor. + /// + public LZOCompressor() { + } + + /// + /// Version string of the compression library. + /// + public string Version { + get { + return lzo_version_string(); + } + } + + /// + /// Version date of the compression library + /// + public string VersionDate { + get { + return lzo_version_date(); + } + } + + /// + /// Compresses a byte array and returns the compressed data in a new + /// array. You need the original length of the array to decompress it. + /// + /// Source array for compression + /// Byte array containing the compressed data + public byte[] Compress(byte[] src) { + if(_traceSwitch.TraceVerbose) { + Trace.WriteLine(String.Format("LZOCompressor: trying to compress {0}", src.Length)); + } + byte[] dst = new byte[src.Length + src.Length / 64 + 16 + 3 + 4]; + int outlen = 0; + lzo1x_1_compress(src, src.Length, dst, ref outlen, _workMemory); + if(_traceSwitch.TraceVerbose) { + Trace.WriteLine(String.Format("LZOCompressor: compressed {0} to {1} bytes", src.Length, outlen)); + } + byte[] ret = new byte[outlen + 4]; + Array.Copy(dst, 0, ret, 0, outlen); + byte[] outlenarr = BitConverter.GetBytes(src.Length); + Array.Copy(outlenarr, 0, ret, outlen, 4); + return ret; + } + + /// + /// Decompresses compressed data to its original state. + /// + /// Source array to be decompressed + /// Decompressed data + public byte[] Decompress(byte[] src) { + if(_traceSwitch.TraceVerbose) { + Trace.WriteLine(String.Format("LZOCompressor: trying to decompress {0}", src.Length)); + } + int origlen = BitConverter.ToInt32(src, src.Length - 4); + byte[] dst = new byte[origlen]; + int outlen = origlen; + lzo1x_decompress(src, src.Length - 4, dst, ref outlen, _workMemory); + if(_traceSwitch.TraceVerbose) { + Trace.WriteLine(String.Format("LZOCompressor: decompressed {0} to {1} bytes", src.Length, origlen)); + } + return dst; + } + } +} diff --git a/external/Lucene.Net.Light/src/core/LucenePackage.cs b/external/Lucene.Net.Light/src/core/LucenePackage.cs new file mode 100644 index 0000000000..9904b411c8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/LucenePackage.cs @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net +{ + + /// Lucene's package information, including version. * + public sealed class LucenePackage + { + + private LucenePackage() + { + } // can't construct + + /* + * /// Return Lucene's package, including version information. + // {{Aroush-1.9}} + public static Package Get() + { + return typeof(LucenePackage).getPackage(); + } + */ + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Messages/INLSException.cs b/external/Lucene.Net.Light/src/core/Messages/INLSException.cs new file mode 100644 index 0000000000..99c3248888 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Messages/INLSException.cs @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Messages +{ + + /// Interface that exceptions should implement to support lazy loading of messages. + /// + /// For Native Language Support (NLS), system of software internationalization. + /// + /// This Interface should be implemented by all exceptions that require + /// translation + /// + /// + public interface INLSException + { + /// a instance of a class that implements the Message interface + Message MessageObject { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Messages/Message.cs b/external/Lucene.Net.Light/src/core/Messages/Message.cs new file mode 100644 index 0000000000..d9c2b03e46 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Messages/Message.cs @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Messages +{ + + /// Message Interface for a lazy loading. + /// For Native Language Support (NLS), system of software internationalization. + /// + public interface Message + { + string Key { get; } + + object[] GetArguments(); + + System.String GetLocalizedMessage(); + + System.String GetLocalizedMessage(System.Globalization.CultureInfo locale); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Messages/MessageImpl.cs b/external/Lucene.Net.Light/src/core/Messages/MessageImpl.cs new file mode 100644 index 0000000000..8e532156fb --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Messages/MessageImpl.cs @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Text; + +namespace Lucene.Net.Messages +{ + + /// Default implementation of Message interface. + /// For Native Language Support (NLS), system of software internationalization. + /// + [Serializable] + public class MessageImpl : Message + { + + private const long serialVersionUID = - 3077643314630884523L; + + private System.String key; + + private System.Object[] arguments = new System.Object[0]; + + public MessageImpl(System.String key) + { + this.key = key; + } + + public MessageImpl(System.String key, params System.Object[] args):this(key) + { + this.arguments = args; + } + + public virtual object[] GetArguments() + { + return this.arguments; + } + + public virtual string Key + { + get { return this.key; } + } + + public virtual string GetLocalizedMessage() + { + return GetLocalizedMessage(System.Threading.Thread.CurrentThread.CurrentCulture); + } + + public virtual string GetLocalizedMessage(System.Globalization.CultureInfo locale) + { + return NLS.GetLocalizedMessage(Key, locale, GetArguments()); + } + + public override string ToString() + { + System.Object[] args = GetArguments(); + StringBuilder argsString = new StringBuilder(); + if (args != null) + { + for (int i = 0; i < args.Length; i++) + { + argsString.Append(i == 0 ? " " : ", ").Append(args[i]); + } + } + return argsString.ToString(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Messages/NLS.cs b/external/Lucene.Net.Light/src/core/Messages/NLS.cs new file mode 100644 index 0000000000..9677de2442 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Messages/NLS.cs @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Support; + +namespace Lucene.Net.Messages +{ + + /// MessageBundles classes extend this class, to implement a bundle. + /// + /// For Native Language Support (NLS), system of software internationalization. + /// + /// This interface is similar to the NLS class in eclipse.osgi.util.NLS class - + /// initializeMessages() method resets the values of all static strings, should + /// only be called by classes that extend from NLS (see TestMessages.java for + /// reference) - performs validation of all message in a bundle, at class load + /// time - performs per message validation at runtime - see NLSTest.java for + /// usage reference + /// + /// MessageBundle classes may subclass this type. + /// + public class NLS + { + public interface IPriviligedAction + { + /// + /// Performs the priviliged action. + /// + /// A value that may represent the result of the action. + System.Object Run(); + } + + private class AnonymousClassPrivilegedAction : IPriviligedAction + { + public AnonymousClassPrivilegedAction(System.Reflection.FieldInfo field) + { + InitBlock(field); + } + private void InitBlock(System.Reflection.FieldInfo field) + { + this.field = field; + } + private System.Reflection.FieldInfo field; + public virtual System.Object Run() + { + // field.setAccessible(true); // {{Aroush-2.9}} java.lang.reflect.AccessibleObject.setAccessible + return null; + } + } + + private static IDictionary bundles = new HashMap(0); + + protected internal NLS() + { + // Do not instantiate + } + + public static System.String GetLocalizedMessage(System.String key) + { + return GetLocalizedMessage(key, System.Threading.Thread.CurrentThread.CurrentCulture); + } + + public static System.String GetLocalizedMessage(System.String key, System.Globalization.CultureInfo locale) + { + System.Object message = GetResourceBundleObject(key, locale); + if (message == null) + { + return "Message with key:" + key + " and locale: " + locale + " not found."; + } + return message.ToString(); + } + + public static System.String GetLocalizedMessage(System.String key, System.Globalization.CultureInfo locale, params System.Object[] args) + { + System.String str = GetLocalizedMessage(key, locale); + + if (args.Length > 0) + { + str = System.String.Format(str, args); + } + + return str; + } + + public static System.String GetLocalizedMessage(System.String key, params System.Object[] args) + { + return GetLocalizedMessage(key, System.Threading.Thread.CurrentThread.CurrentCulture, args); + } + + /// Initialize a given class with the message bundle Keys Should be called from + /// a class that extends NLS in a static block at class load time. + /// + /// + /// Property file with that contains the message bundle + /// + /// where constants will reside + /// + //@SuppressWarnings("unchecked") + protected internal static void InitializeMessages(System.String bundleName) + { + try + { + Load(); + if (!bundles.ContainsKey(bundleName)) + bundles[bundleName] = typeof(T); + } + catch (System.Exception) + { + // ignore all errors and exceptions + // because this function is supposed to be called at class load time. + } + } + + private static System.Object GetResourceBundleObject(System.String messageKey, System.Globalization.CultureInfo locale) + { + + // slow resource checking + // need to loop thru all registered resource bundles + for (var it = bundles.Keys.GetEnumerator(); it.MoveNext(); ) + { + System.Type clazz = bundles[it.Current]; + System.Threading.Thread.CurrentThread.CurrentUICulture = locale; + System.Resources.ResourceManager resourceBundle = System.Resources.ResourceManager.CreateFileBasedResourceManager(clazz.Name, "Messages", null); //{{Lucene.Net-2.9.1}} Can we make resourceDir "Messages" more general? + if (resourceBundle != null) + { + try + { + System.Object obj = resourceBundle.GetObject(messageKey); + if (obj != null) + return obj; + } + catch (System.Resources.MissingManifestResourceException) + { + // just continue it might be on the next resource bundle + } + } + } + // if resource is not found + return null; + } + + private static void Load() + { + var clazz = typeof (T); + System.Reflection.FieldInfo[] fieldArray = clazz.GetFields(System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.DeclaredOnly | System.Reflection.BindingFlags.Static); + + bool isFieldAccessible = clazz.IsPublic; + + // build a map of field names to Field objects + int len = fieldArray.Length; + var fields = new HashMap(len * 2); + for (int i = 0; i < len; i++) + { + fields[fieldArray[i].Name] = fieldArray[i]; + LoadfieldValue(fieldArray[i], isFieldAccessible); + } + } + + /// + /// + private static void LoadfieldValue(System.Reflection.FieldInfo field, bool isFieldAccessible) + { + var clazz = typeof (T); + /* + int MOD_EXPECTED = Modifier.PUBLIC | Modifier.STATIC; + int MOD_MASK = MOD_EXPECTED | Modifier.FINAL; + if ((field.getModifiers() & MOD_MASK) != MOD_EXPECTED) + return ; + */ + if (!(field.IsPublic || field.IsStatic)) + return ; + + // Set a value for this empty field. + if (!isFieldAccessible) + MakeAccessible(field); + try + { + field.SetValue(null, field.Name); + ValidateMessage(field.Name); + } + catch (System.ArgumentException) + { + // should not happen + } + catch (System.UnauthorizedAccessException) + { + // should not happen + } + } + + /// - Message Key + /// + private static void ValidateMessage(System.String key) + { + // Test if the message is present in the resource bundle + var clazz = typeof (T); + try + { + System.Threading.Thread.CurrentThread.CurrentUICulture = System.Threading.Thread.CurrentThread.CurrentCulture; + System.Resources.ResourceManager resourceBundle = System.Resources.ResourceManager.CreateFileBasedResourceManager(clazz.FullName, "", null); + if (resourceBundle != null) + { + System.Object obj = resourceBundle.GetObject(key); + if (obj == null) + { + System.Console.Error.WriteLine("WARN: Message with key:" + key + " and locale: " + System.Threading.Thread.CurrentThread.CurrentCulture + " not found."); + } + } + } + catch (System.Resources.MissingManifestResourceException) + { + System.Console.Error.WriteLine("WARN: Message with key:" + key + " and locale: " + System.Threading.Thread.CurrentThread.CurrentCulture + " not found."); + } + catch (System.Exception) + { + // ignore all other errors and exceptions + // since this code is just a test to see if the message is present on the + // system + } + } + + /* + * Make a class field accessible + */ + //@SuppressWarnings("unchecked") + private static void MakeAccessible(System.Reflection.FieldInfo field) + { + if (System.Security.SecurityManager.SecurityEnabled) + { + //field.setAccessible(true); // {{Aroush-2.9}} java.lang.reflect.AccessibleObject.setAccessible + } + else + { + //AccessController.doPrivileged(new AnonymousClassPrivilegedAction(field)); // {{Aroush-2.9}} java.security.AccessController.doPrivileged + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/CharStream.cs b/external/Lucene.Net.Light/src/core/QueryParser/CharStream.cs new file mode 100644 index 0000000000..cfcdb4fbc7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/CharStream.cs @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */ +/* JavaCCOptions:STATIC=false */ + +using System; + +namespace Lucene.Net.QueryParsers +{ + + /// This interface describes a character stream that maintains line and + /// column number positions of the characters. It also has the capability + /// to backup the stream to some extent. An implementation of this + /// interface is used in the TokenManager implementation generated by + /// JavaCCParser. + /// + /// All the methods except backup can be implemented in any fashion. backup + /// needs to be implemented correctly for the correct operation of the lexer. + /// Rest of the methods are all used to get information like line number, + /// column number and the String that constitutes a token and are not used + /// by the lexer. Hence their implementation won't affect the generated lexer's + /// operation. + /// + + public interface ICharStream + { + /// Returns the next character from the selected input. The method + /// of selecting the input is the responsibility of the class + /// implementing this interface. Can throw any java.io.IOException. + /// + char ReadChar(); + + /// Returns the column position of the character last read. + /// + /// + /// + /// + [Obsolete] + int Column { get; } + + /// Returns the line number of the character last read. + /// + /// + /// + /// + [Obsolete] + int Line { get; } + + /// Returns the column number of the last character for current token (being + /// matched after the last call to BeginTOken). + /// + int EndColumn { get; } + + /// Returns the line number of the last character for current token (being + /// matched after the last call to BeginTOken). + /// + int EndLine { get; } + + /// Returns the column number of the first character for current token (being + /// matched after the last call to BeginTOken). + /// + int BeginColumn { get; } + + /// Returns the line number of the first character for current token (being + /// matched after the last call to BeginTOken). + /// + int BeginLine { get; } + + /// Backs up the input stream by amount steps. Lexer calls this method if it + /// had already read some characters, but could not use them to match a + /// (longer) token. So, they will be used again as the prefix of the next + /// token and it is the implemetation's responsibility to do this right. + /// + void Backup(int amount); + + /// Returns the next character that marks the beginning of the next token. + /// All characters must remain in the buffer between two successive calls + /// to this method to implement backup correctly. + /// + char BeginToken(); + + /// Returns a string made up of characters from the marked token beginning + /// to the current buffer position. Implementations have the choice of returning + /// anything that they want to. For example, for efficiency, one might decide + /// to just return null, which is a valid implementation. + /// + string Image { get; } + + /// Returns an array of characters that make up the suffix of length 'len' for + /// the currently matched token. This is used to build up the matched string + /// for use in actions in the case of MORE. A simple and inefficient + /// implementation of this is as follows : + /// + /// { + /// String t = GetImage(); + /// return t.substring(t.length() - len, t.length()).toCharArray(); + /// } + /// + char[] GetSuffix(int len); + + /// The lexer calls this function to indicate that it is done with the stream + /// and hence implementations can free any resources held by this class. + /// Again, the body of this function can be just empty and it will not + /// affect the lexer's operation. + /// + void Done(); + } + /* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */ +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/FastCharStream.cs b/external/Lucene.Net.Light/src/core/QueryParser/FastCharStream.cs new file mode 100644 index 0000000000..62876f30c0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/FastCharStream.cs @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// FastCharStream.java + +using System; + +namespace Lucene.Net.QueryParsers +{ + + /// An efficient implementation of JavaCC's CharStream interface.

Note that + /// this does not do line-number counting, but instead keeps track of the + /// character position of the token in the input, as required by Lucene's + /// API. + /// + ///

+ public sealed class FastCharStream : ICharStream + { + internal char[] buffer = null; + + internal int bufferLength = 0; // end of valid chars + internal int bufferPosition = 0; // next char to read + + internal int tokenStart = 0; // offset in buffer + internal int bufferStart = 0; // position in file of buffer + + internal System.IO.TextReader input; // source of chars + + /// Constructs from a Reader. + public FastCharStream(System.IO.TextReader r) + { + input = r; + } + + public char ReadChar() + { + if (bufferPosition >= bufferLength) + Refill(); + return buffer[bufferPosition++]; + } + + private void Refill() + { + int newPosition = bufferLength - tokenStart; + + if (tokenStart == 0) + { + // token won't fit in buffer + if (buffer == null) + { + // first time: alloc buffer + buffer = new char[2048]; + } + else if (bufferLength == buffer.Length) + { + // grow buffer + char[] newBuffer = new char[buffer.Length * 2]; + Array.Copy(buffer, 0, newBuffer, 0, bufferLength); + buffer = newBuffer; + } + } + else + { + // shift token to front + Array.Copy(buffer, tokenStart, buffer, 0, newPosition); + } + + bufferLength = newPosition; // update state + bufferPosition = newPosition; + bufferStart += tokenStart; + tokenStart = 0; + + int charsRead = input.Read(buffer, newPosition, buffer.Length - newPosition); + if (charsRead <= 0) + throw new System.IO.IOException("read past eof"); + else + bufferLength += charsRead; + } + + public char BeginToken() + { + tokenStart = bufferPosition; + return ReadChar(); + } + + public void Backup(int amount) + { + bufferPosition -= amount; + } + + public string Image + { + get { return new System.String(buffer, tokenStart, bufferPosition - tokenStart); } + } + + public char[] GetSuffix(int len) + { + char[] value_Renamed = new char[len]; + Array.Copy(buffer, bufferPosition - len, value_Renamed, 0, len); + return value_Renamed; + } + + public void Done() + { + try + { + input.Close(); + } + catch (System.IO.IOException e) + { + System.Console.Error.WriteLine("Caught: " + e + "; ignoring."); + } + } + + public int Column + { + get { return bufferStart + bufferPosition; } + } + + public int Line + { + get { return 1; } + } + + public int EndColumn + { + get { return bufferStart + bufferPosition; } + } + + public int EndLine + { + get { return 1; } + } + + public int BeginColumn + { + get { return bufferStart + tokenStart; } + } + + public int BeginLine + { + get { return 1; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/MultiFieldQueryParser.cs b/external/Lucene.Net.Light/src/core/QueryParser/MultiFieldQueryParser.cs new file mode 100644 index 0000000000..f506f346a3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/MultiFieldQueryParser.cs @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Search; +using Analyzer = Lucene.Net.Analysis.Analyzer; +using BooleanClause = Lucene.Net.Search.BooleanClause; +using BooleanQuery = Lucene.Net.Search.BooleanQuery; +using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery; +using PhraseQuery = Lucene.Net.Search.PhraseQuery; +using Query = Lucene.Net.Search.Query; +using Version = Lucene.Net.Util.Version; + +namespace Lucene.Net.QueryParsers +{ + + /// A QueryParser which constructs queries to search multiple fields. + /// + /// + /// $Revision: 829231 $ + /// + public class MultiFieldQueryParser : QueryParser + { + protected internal string[] fields; + protected internal IDictionary boosts; + + /// Creates a MultiFieldQueryParser. Allows passing of a map with term to + /// Boost, and the boost to apply to each term. + /// + ///

+ /// It will, when parse(String query) is called, construct a query like this + /// (assuming the query consists of two terms and you specify the two fields + /// title and body): + ///

+ /// + /// + /// (title:term1 body:term1) (title:term2 body:term2) + /// + /// + ///

+ /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: + ///

+ /// + /// + /// +(title:term1 body:term1) +(title:term2 body:term2) + /// + /// + ///

+ /// When you pass a boost (title=>5 body=>10) you can get + ///

+ /// + /// + /// +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) + /// + /// + ///

+ /// In other words, all the query's terms must appear, but it doesn't matter + /// in what fields they appear. + ///

+ ///

+ public MultiFieldQueryParser(Version matchVersion, string[] fields, Analyzer analyzer, IDictionary boosts) + : this(matchVersion, fields, analyzer) + { + this.boosts = boosts; + } + + /// Creates a MultiFieldQueryParser. + /// + ///

+ /// It will, when parse(String query) is called, construct a query like this + /// (assuming the query consists of two terms and you specify the two fields + /// title and body): + ///

+ /// + /// + /// (title:term1 body:term1) (title:term2 body:term2) + /// + /// + ///

+ /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: + ///

+ /// + /// + /// +(title:term1 body:term1) +(title:term2 body:term2) + /// + /// + ///

+ /// In other words, all the query's terms must appear, but it doesn't matter + /// in what fields they appear. + ///

+ ///

+ public MultiFieldQueryParser(Version matchVersion, System.String[] fields, Analyzer analyzer) + : base(matchVersion, null, analyzer) + { + this.fields = fields; + } + + protected internal override Query GetFieldQuery(string field, string queryText, int slop) + { + if (field == null) + { + IList clauses = new List(); + for (int i = 0; i < fields.Length; i++) + { + Query q = base.GetFieldQuery(fields[i], queryText); + if (q != null) + { + //If the user passes a map of boosts + if (boosts != null) + { + //Get the boost from the map and apply them + Single boost = boosts[fields[i]]; + q.Boost = boost; + } + ApplySlop(q, slop); + clauses.Add(new BooleanClause(q, Occur.SHOULD)); + } + } + if (clauses.Count == 0) + // happens for stopwords + return null; + return GetBooleanQuery(clauses, true); + } + Query q2 = base.GetFieldQuery(field, queryText); + ApplySlop(q2, slop); + return q2; + } + + private void ApplySlop(Query q, int slop) + { + if (q is PhraseQuery) + { + ((PhraseQuery)q).Slop = slop; + } + else if (q is MultiPhraseQuery) + { + ((MultiPhraseQuery)q).Slop = slop; + } + } + + + protected internal override Query GetFieldQuery(System.String field, System.String queryText) + { + return GetFieldQuery(field, queryText, 0); + } + + + protected internal override Query GetFuzzyQuery(System.String field, System.String termStr, float minSimilarity) + { + if (field == null) + { + IList clauses = new List(); + for (int i = 0; i < fields.Length; i++) + { + clauses.Add(new BooleanClause(GetFuzzyQuery(fields[i], termStr, minSimilarity), Occur.SHOULD)); + } + return GetBooleanQuery(clauses, true); + } + return base.GetFuzzyQuery(field, termStr, minSimilarity); + } + + protected internal override Query GetPrefixQuery(System.String field, System.String termStr) + { + if (field == null) + { + IList clauses = new List(); + for (int i = 0; i < fields.Length; i++) + { + clauses.Add(new BooleanClause(GetPrefixQuery(fields[i], termStr), Occur.SHOULD)); + } + return GetBooleanQuery(clauses, true); + } + return base.GetPrefixQuery(field, termStr); + } + + protected internal override Query GetWildcardQuery(System.String field, System.String termStr) + { + if (field == null) + { + IList clauses = new List(); + for (int i = 0; i < fields.Length; i++) + { + clauses.Add(new BooleanClause(GetWildcardQuery(fields[i], termStr), Occur.SHOULD)); + } + return GetBooleanQuery(clauses, true); + } + return base.GetWildcardQuery(field, termStr); + } + + + protected internal override Query GetRangeQuery(System.String field, System.String part1, System.String part2, bool inclusive) + { + if (field == null) + { + IList clauses = new List(); + for (int i = 0; i < fields.Length; i++) + { + clauses.Add(new BooleanClause(GetRangeQuery(fields[i], part1, part2, inclusive), Occur.SHOULD)); + } + return GetBooleanQuery(clauses, true); + } + return base.GetRangeQuery(field, part1, part2, inclusive); + } + + /// Parses a query which searches on the fields specified. + ///

+ /// If x fields are specified, this effectively constructs: + /// + /// + /// (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx) + /// + /// + ///

+ /// Lucene version to match; this is passed through to + /// QueryParser. + /// + /// Queries strings to parse + /// + /// Fields to search on + /// + /// Analyzer to use + /// + /// ParseException + /// if query parsing fails + /// + /// IllegalArgumentException + /// if the length of the queries array differs from the length of + /// the fields array + /// + public static Query Parse(Version matchVersion, System.String[] queries, System.String[] fields, Analyzer analyzer) + { + if (queries.Length != fields.Length) + throw new System.ArgumentException("queries.length != fields.length"); + BooleanQuery bQuery = new BooleanQuery(); + for (int i = 0; i < fields.Length; i++) + { + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); + Query q = qp.Parse(queries[i]); + if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0)) + { + bQuery.Add(q, Occur.SHOULD); + } + } + return bQuery; + } + + /// Parses a query, searching on the fields specified. Use this if you need + /// to specify certain fields as required, and others as prohibited. + ///

+ /// Uasge: + /// + /// String[] fields = {"filename", "contents", "description"}; + /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + /// BooleanClause.Occur.MUST, + /// BooleanClause.Occur.MUST_NOT}; + /// MultiFieldQueryParser.parse("query", fields, flags, analyzer); + /// + ///

+ /// The code above would construct a query: + /// + /// + /// (filename:query) +(contents:query) -(description:query) + /// + /// + ///

+ /// Lucene version to match; this is passed through to + /// QueryParser. + /// + /// Query string to parse + /// + /// Fields to search on + /// + /// Flags describing the fields + /// + /// Analyzer to use + /// + /// ParseException + /// if query parsing fails + /// + /// IllegalArgumentException + /// if the length of the fields array differs from the length of + /// the flags array + /// + public static Query Parse(Version matchVersion, System.String query, System.String[] fields, Occur[] flags, Analyzer analyzer) + { + if (fields.Length != flags.Length) + throw new System.ArgumentException("fields.length != flags.length"); + BooleanQuery bQuery = new BooleanQuery(); + for (int i = 0; i < fields.Length; i++) + { + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); + Query q = qp.Parse(query); + if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0)) + { + bQuery.Add(q, flags[i]); + } + } + return bQuery; + } + + /// Parses a query, searching on the fields specified. Use this if you need + /// to specify certain fields as required, and others as prohibited. + ///

+ /// Usage: + /// + /// String[] query = {"query1", "query2", "query3"}; + /// String[] fields = {"filename", "contents", "description"}; + /// BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, + /// BooleanClause.Occur.MUST, + /// BooleanClause.Occur.MUST_NOT}; + /// MultiFieldQueryParser.parse(query, fields, flags, analyzer); + /// + ///

+ /// The code above would construct a query: + /// + /// + /// (filename:query1) +(contents:query2) -(description:query3) + /// + /// + ///

+ /// Lucene version to match; this is passed through to + /// QueryParser. + /// + /// Queries string to parse + /// + /// Fields to search on + /// + /// Flags describing the fields + /// + /// Analyzer to use + /// + /// ParseException + /// if query parsing fails + /// + /// IllegalArgumentException + /// if the length of the queries, fields, and flags array differ + /// + public static Query Parse(Version matchVersion, System.String[] queries, System.String[] fields, Occur[] flags, Analyzer analyzer) + { + if (!(queries.Length == fields.Length && queries.Length == flags.Length)) + throw new System.ArgumentException("queries, fields, and flags array have have different length"); + BooleanQuery bQuery = new BooleanQuery(); + for (int i = 0; i < fields.Length; i++) + { + QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer); + Query q = qp.Parse(queries[i]); + if (q != null && (!(q is BooleanQuery) || ((BooleanQuery)q).GetClauses().Length > 0)) + { + bQuery.Add(q, flags[i]); + } + } + return bQuery; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/ParseException.cs b/external/Lucene.Net.Light/src/core/QueryParser/ParseException.cs new file mode 100644 index 0000000000..ab0fbca147 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/ParseException.cs @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */ +/* JavaCCOptions:KEEP_LINE_COL=null */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.QueryParsers +{ + + /// This exception is thrown when parse errors are encountered. + /// You can explicitly create objects of this exception type by + /// calling the method generateParseException in the generated + /// parser. + /// + /// You can modify this class to customize your error reporting + /// mechanisms so long as you retain the public fields. + /// + [Serializable] + public class ParseException:System.Exception + { + /// This method has the standard behavior when this object has been + /// created using the standard constructors. Otherwise, it uses + /// "currentToken" and "expectedTokenSequences" to generate a parse + /// error message and returns it. If this object has been created + /// due to a parse error, and you do not catch it (it gets thrown + /// from the parser), then this method is called during the printing + /// of the final stack trace, and hence the correct error message + /// gets displayed. + /// + public override System.String Message + { + get + { + if (!specialConstructor) + { + return base.Message; + } + System.Text.StringBuilder expected = new System.Text.StringBuilder(); + int maxSize = 0; + for (int i = 0; i < expectedTokenSequences.Length; i++) + { + if (maxSize < expectedTokenSequences[i].Length) + { + maxSize = expectedTokenSequences[i].Length; + } + for (int j = 0; j < expectedTokenSequences[i].Length; j++) + { + expected.Append(tokenImage[expectedTokenSequences[i][j]]).Append(' '); + } + if (expectedTokenSequences[i][expectedTokenSequences[i].Length - 1] != 0) + { + expected.Append("..."); + } + expected.Append(eol).Append(" "); + } + System.String retval = "Encountered \""; + Token tok = currentToken.next; + for (int i = 0; i < maxSize; i++) + { + if (i != 0) + retval += " "; + if (tok.kind == 0) + { + retval += tokenImage[0]; + break; + } + retval += (" " + tokenImage[tok.kind]); + retval += " \""; + retval += Add_escapes(tok.image); + retval += " \""; + tok = tok.next; + } + retval += ("\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn); + retval += ("." + eol); + if (expectedTokenSequences.Length == 1) + { + retval += ("Was expecting:" + eol + " "); + } + else + { + retval += ("Was expecting one of:" + eol + " "); + } + retval += expected.ToString(); + return retval; + } + + } + + /// This constructor is used by the method "generateParseException" + /// in the generated parser. Calling this constructor generates + /// a new object of this type with the fields "currentToken", + /// "expectedTokenSequences", and "tokenImage" set. The boolean + /// flag "specialConstructor" is also set to true to indicate that + /// this constructor was used to create this object. + /// This constructor calls its super class with the empty string + /// to force the "toString" method of parent class "Throwable" to + /// print the error message in the form: + /// ParseException: <result of getMessage> + /// + public ParseException(Token currentTokenVal, int[][] expectedTokenSequencesVal, System.String[] tokenImageVal):base("") + { + specialConstructor = true; + currentToken = currentTokenVal; + expectedTokenSequences = expectedTokenSequencesVal; + tokenImage = tokenImageVal; + } + + /// The following constructors are for use by you for whatever + /// purpose you can think of. Constructing the exception in this + /// manner makes the exception behave in the normal way - i.e., as + /// documented in the class "Throwable". The fields "errorToken", + /// "expectedTokenSequences", and "tokenImage" do not contain + /// relevant information. The JavaCC generated code does not use + /// these constructors. + /// + + public ParseException():base() + { + specialConstructor = false; + } + + /// Constructor with message. + public ParseException(System.String message):base(message) + { + specialConstructor = false; + } + + /// Constructor with message. + public ParseException(System.String message, System.Exception ex) : base(message, ex) + { + specialConstructor = false; + } + + /// This variable determines which constructor was used to create + /// this object and thereby affects the semantics of the + /// "getMessage" method (see below). + /// + protected internal bool specialConstructor; + + /// This is the last token that has been consumed successfully. If + /// this object has been created due to a parse error, the token + /// followng this token will (therefore) be the first error token. + /// + public Token currentToken; + + /// Each entry in this array is an array of integers. Each array + /// of integers represents a sequence of tokens (by their ordinal + /// values) that is expected at this point of the parse. + /// + public int[][] expectedTokenSequences; + + /// This is a reference to the "tokenImage" array of the generated + /// parser within which the parse error occurred. This array is + /// defined in the generated ...Constants interface. + /// + public System.String[] tokenImage; + + /// The end of line string for this machine. + protected internal System.String eol = AppSettings.Get("line.separator", "\n"); + + /// Used to convert raw characters to their escaped version + /// when these raw version cannot be used as part of an ASCII + /// string literal. + /// + protected internal virtual System.String Add_escapes(System.String str) + { + System.Text.StringBuilder retval = new System.Text.StringBuilder(); + char ch; + for (int i = 0; i < str.Length; i++) + { + switch (str[i]) + { + + case (char) (0): + continue; + + case '\b': + retval.Append("\\b"); + continue; + + case '\t': + retval.Append("\\t"); + continue; + + case '\n': + retval.Append("\\n"); + continue; + + case '\f': + retval.Append("\\f"); + continue; + + case '\r': + retval.Append("\\r"); + continue; + + case '\"': + retval.Append("\\\""); + continue; + + case '\'': + retval.Append("\\\'"); + continue; + + case '\\': + retval.Append("\\\\"); + continue; + + default: + if ((ch = str[i]) < 0x20 || ch > 0x7e) + { + System.String s = "0000" + System.Convert.ToString(ch, 16); + retval.Append("\\u" + s.Substring(s.Length - 4, (s.Length) - (s.Length - 4))); + } + else + { + retval.Append(ch); + } + continue; + + } + } + return retval.ToString(); + } + } + /* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */ +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/QueryParser.cs b/external/Lucene.Net.Light/src/core/QueryParser/QueryParser.cs new file mode 100644 index 0000000000..e58ecea1f6 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/QueryParser.cs @@ -0,0 +1,2095 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. +*/ + +/* Generated By:JavaCC: Do not edit this line. QueryParser.java */ + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Text; +using Lucene.Net.Analysis; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Search; +using Lucene.Net.Support; +using Lucene.Net.Util; +using Analyzer = Lucene.Net.Analysis.Analyzer; +using CachingTokenFilter = Lucene.Net.Analysis.CachingTokenFilter; +using TokenStream = Lucene.Net.Analysis.TokenStream; +using DateField = Lucene.Net.Documents.DateField; +using DateTools = Lucene.Net.Documents.DateTools; +using Term = Lucene.Net.Index.Term; +using BooleanClause = Lucene.Net.Search.BooleanClause; +using BooleanQuery = Lucene.Net.Search.BooleanQuery; +using FuzzyQuery = Lucene.Net.Search.FuzzyQuery; +using MatchAllDocsQuery = Lucene.Net.Search.MatchAllDocsQuery; +using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery; +using MultiTermQuery = Lucene.Net.Search.MultiTermQuery; +using PhraseQuery = Lucene.Net.Search.PhraseQuery; +using PrefixQuery = Lucene.Net.Search.PrefixQuery; +using Query = Lucene.Net.Search.Query; +using Single = Lucene.Net.Support.Single; +using TermQuery = Lucene.Net.Search.TermQuery; +using TermRangeQuery = Lucene.Net.Search.TermRangeQuery; +using WildcardQuery = Lucene.Net.Search.WildcardQuery; +using Version = Lucene.Net.Util.Version; + +namespace Lucene.Net.QueryParsers +{ + /// This class is generated by JavaCC. The most important method is + /// . + /// + /// The syntax for query strings is as follows: + /// A Query is a series of clauses. + /// A clause may be prefixed by: + /// + /// a plus (+) or a minus (-) sign, indicating + /// that the clause is required or prohibited respectively; or + /// a term followed by a colon, indicating the field to be searched. + /// This enables one to construct queries which search multiple fields. + /// + /// + /// A clause may be either: + /// + /// a term, indicating all the documents that contain this term; or + /// a nested query, enclosed in parentheses. Note that this may be used + /// with a +/- prefix to require any of a set of + /// terms. + /// + /// + /// Thus, in BNF, the query grammar is: + /// + /// Query ::= ( Clause )* + /// Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) + /// + /// + ///

+ /// Examples of appropriately formatted queries can be found in the query syntax + /// documentation. + ///

+ /// + ///

+ /// In s, QueryParser tries to detect date values, e.g. + /// date:[6/1/2005 TO 6/4/2005] produces a range query that searches + /// for "date" fields between 2005-06-01 and 2005-06-04. Note that the format + /// of the accepted input depends on the . + /// By default a date is converted into a search term using the deprecated + /// for compatibility reasons. + /// To use the new to convert dates, a + /// has to be set. + ///

+ ///

+ /// The date resolution that shall be used for RangeQueries can be set + /// using + /// or . The former + /// sets the default date resolution for all fields, whereas the latter can + /// be used to set field specific date resolutions. Field specific date + /// resolutions take, if set, precedence over the default date resolution. + ///

+ ///

+ /// If you use neither nor in your + /// index, you can create your own + /// query parser that inherits QueryParser and overwrites + /// to + /// use a different method for date conversion. + ///

+ /// + ///

Note that QueryParser is not thread-safe.

+ /// + ///

NOTE: there is a new QueryParser in contrib, which matches + /// the same syntax as this class, but is more modular, + /// enabling substantial customization to how a query is created. + /// + ///

NOTE: there is a new QueryParser in contrib, which matches + /// the same syntax as this class, but is more modular, + /// enabling substantial customization to how a query is created. + /// NOTE: You must specify the required compatibility when + /// creating QueryParser: + /// + /// As of 2.9, is true by default. + /// + ///

+ public class QueryParser : QueryParserConstants + { + + private static int CONJ_NONE = 0; + private static int CONJ_AND = 1; + private static int CONJ_OR = 2; + + private static int MOD_NONE = 0; + private static int MOD_NOT = 10; + private static int MOD_REQ = 11; + + // make it possible to call setDefaultOperator() without accessing + // the nested class: + /// Alternative form of QueryParser.Operator.AND + public static Operator AND_OPERATOR = Operator.AND; + + /// Alternative form of QueryParser.Operator.OR + public static Operator OR_OPERATOR = Operator.OR; + + /// The actual operator that parser uses to combine query terms + private Operator operator_Renamed = OR_OPERATOR; + + private bool lowercaseExpandedTerms = true; + private RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + private bool allowLeadingWildcard = false; + private bool enablePositionIncrements = true; + + // LUCENENET-423 - DateRange differences with Java and .NET + private bool _useJavaStyleDateRangeParsing = false; + + private Analyzer analyzer; + private String field; + private int phraseSlop = 0; + private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + private System.Globalization.CultureInfo locale = System.Globalization.CultureInfo.CurrentCulture; + + // the default date resolution + private DateTools.Resolution dateResolution = null; + // maps field names to date resolutions + private IDictionary fieldToDateResolution = null; + + // The collator to use when determining range inclusion, + // for use when constructing RangeQuerys. + private System.Globalization.CompareInfo rangeCollator = null; + + /* The default operator_Renamed for parsing queries. + * Use {@link QueryParser#setDefaultOperator} to change it. + */ + + public enum Operator + { + OR, + AND + } + + /* Constructs a query parser. + * @param matchVersion Lucene version to match. See above) + * @param f the default field for query terms. + * @param a used to find terms in the query text. + */ + + public QueryParser(Version matchVersion, String f, Analyzer a) + : this(new FastCharStream(new StringReader(""))) + { + analyzer = a; + field = f; + if (matchVersion.OnOrAfter(Version.LUCENE_29)) + { + enablePositionIncrements = true; + } + else + { + enablePositionIncrements = false; + } + + // LUCENENET-423 - DateRange differences with Java and .NET + if (matchVersion.OnOrAfter(Version.LUCENE_30)) + { + _useJavaStyleDateRangeParsing = true; + } + } + + /// Parses a query string, returning a {@link Lucene.Net.Search.Query}. + /// the query string to be parsed. + /// + /// ParseException if the parsing fails + public virtual Query Parse(String query) + { + ReInit(new FastCharStream(new StringReader(query))); + try + { + // TopLevelQuery is a Query followed by the end-of-input (EOF) + Query res = TopLevelQuery(field); + return res ?? NewBooleanQuery(false); + } + catch (ParseException tme) + { + // rethrow to include the original query: + throw new ParseException("Cannot parse '" + query + "': " + tme.Message, tme); + } + catch (TokenMgrError tme) + { + throw new ParseException("Cannot parse '" + query + "': " + tme.Message, tme); + } + catch (BooleanQuery.TooManyClauses tmc) + { + throw new ParseException("Cannot parse '" + query + "': too many bool clauses", tmc); + } + } + + /// Returns the analyzer. + public virtual Analyzer Analyzer + { + get { return analyzer; } + } + + /// Returns the field. + public virtual string Field + { + get { return field; } + } + + /// + /// Gets or sets the minimal similarity for fuzzy queries. + /// Default is 0.5f. + /// + public virtual float FuzzyMinSim + { + get { return fuzzyMinSim; } + set { this.fuzzyMinSim = value; } + } + + /// Gets or sets the prefix length for fuzzy queries. + /// Returns the fuzzyPrefixLength. + public virtual int FuzzyPrefixLength + { + get { return fuzzyPrefixLength; } + set { this.fuzzyPrefixLength = value; } + } + + /// Gets or sets the default slop for phrases. If zero, then exact phrase matches + /// are required. Default value is zero. + /// + public virtual int PhraseSlop + { + set { this.phraseSlop = value; } + get { return phraseSlop; } + } + + /// Set to true to allow leading wildcard characters. + ///

+ /// When set, * or ? are allowed as + /// the first character of a PrefixQuery and WildcardQuery. + /// Note that this can produce very slow + /// queries on big indexes. + ///

+ /// Default: false. + ///

+ public virtual bool AllowLeadingWildcard + { + set { this.allowLeadingWildcard = value; } + get { return allowLeadingWildcard; } + } + + /// Set to true to enable position increments in result query. + ///

+ /// When set, result phrase and multi-phrase queries will + /// be aware of position increments. + /// Useful when e.g. a StopFilter increases the position increment of + /// the token that follows an omitted token. + ///

+ /// Default: false. + ///

+ public virtual bool EnablePositionIncrements + { + set { this.enablePositionIncrements = value; } + get { return enablePositionIncrements; } + } + + /// Gets or sets the boolean operator of the QueryParser. + /// In default mode (OR_OPERATOR) terms without any modifiers + /// are considered optional: for example capital of Hungary is equal to + /// capital OR of OR Hungary.
+ /// In AND_OPERATOR mode terms are considered to be in conjunction: the + /// above mentioned query is parsed as capital AND of AND Hungary + ///
+ public virtual Operator DefaultOperator + { + set { this.operator_Renamed = value; } + get { return operator_Renamed; } + } + + /// Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + /// lower-cased or not. Default is true. + /// + public virtual bool LowercaseExpandedTerms + { + set { this.lowercaseExpandedTerms = value; } + get { return lowercaseExpandedTerms; } + } + + + /// By default QueryParser uses + /// when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it + /// a) Runs faster b) Does not have the scarcity of terms unduly influence score + /// c) avoids any "TooManyBooleanClauses" exception. + /// However, if your application really needs to use the + /// old-fashioned BooleanQuery expansion rewriting and the above + /// points are not relevant then use this to change + /// the rewrite method. + /// + public virtual RewriteMethod MultiTermRewriteMethod + { + set { multiTermRewriteMethod = value; } + get { return multiTermRewriteMethod; } + } + + /// Gets or sets locale used by date range parsing. + public virtual CultureInfo Locale + { + set { this.locale = value; } + get { return locale; } + } + + /// Sets the default date resolution used by RangeQueries for fields for which no + /// specific date resolutions has been set. Field specific resolutions can be set + /// with {@link #SetDateResolution(String, DateTools.Resolution)}. + /// + /// + /// the default date resolution to set + /// + public virtual void SetDateResolution(DateTools.Resolution dateResolution) + { + this.dateResolution = dateResolution; + } + + /// Sets the date resolution used by RangeQueries for a specific field. + /// + /// + /// field for which the date resolution is to be set + /// + /// date resolution to set + /// + public virtual void SetDateResolution(String fieldName, DateTools.Resolution dateResolution) + { + if (fieldName == null) + { + throw new ArgumentException("Field cannot be null."); + } + + if (fieldToDateResolution == null) + { + // lazily initialize HashMap + fieldToDateResolution = new HashMap(); + } + + fieldToDateResolution.Add(fieldName, dateResolution); + } + + /// Returns the date resolution that is used by RangeQueries for the given field. + /// Returns null, if no default or field specific date resolution has been set + /// for the given field. + /// + public virtual DateTools.Resolution getDateResolution(String fieldName) + { + if (fieldName == null) + { + throw new ArgumentException("Field cannot be null."); + } + + if (fieldToDateResolution == null) + { + // no field specific date resolutions set; return default date resolution instead + return this.dateResolution; + } + + DateTools.Resolution resolution = fieldToDateResolution[fieldName]; + if (resolution == null) + { + // no date resolutions set for the given field; return default date resolution instead + resolution = this.dateResolution; + } + + return resolution; + } + + /// Gets or sets the collator used to determine index term inclusion in ranges + /// for RangeQuerys. + ///

+ /// WARNING: Setting the rangeCollator to a non-null + /// collator using this method will cause every single index Term in the + /// Field referenced by lowerTerm and/or upperTerm to be examined. + /// Depending on the number of index Terms in this Field, the operation could + /// be very slow. + /// + ///

+ /// the collator to use when constructing RangeQuerys + public virtual CompareInfo RangeCollator + { + set { rangeCollator = value; } + get { return rangeCollator; } + } + + protected internal virtual void AddClause(List clauses, int conj, int mods, Query q) + { + bool required, prohibited; + + // If this term is introduced by AND, make the preceding term required, + // unless it's already prohibited + if (clauses.Count > 0 && conj == CONJ_AND) + { + BooleanClause c = clauses[clauses.Count - 1]; + if (!c.IsProhibited) + c.Occur = Occur.MUST; + } + + if (clauses.Count > 0 && operator_Renamed == AND_OPERATOR && conj == CONJ_OR) + { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parsed as +a OR b + BooleanClause c = clauses[clauses.Count - 1]; + if (!c.IsProhibited) + c.Occur = Occur.SHOULD; + } + + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == null) + return; + + if (operator_Renamed == OR_OPERATOR) + { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) + { + required = true; + } + } + else + { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } + if (required && !prohibited) + clauses.Add(NewBooleanClause(q, Occur.MUST)); + else if (!required && !prohibited) + clauses.Add(NewBooleanClause(q, Occur.SHOULD)); + else if (!required && prohibited) + clauses.Add(NewBooleanClause(q, Occur.MUST_NOT)); + else + throw new SystemException("Clause cannot be both required and prohibited"); + } + + + /// throw in overridden method to disallow + /// + protected internal virtual Query GetFieldQuery(String field, String queryText) + { + // Use the analyzer to get all the tokens, and then build a TermQuery, + // PhraseQuery, or nothing based on the term count + + TokenStream source; + try + { + source = analyzer.ReusableTokenStream(field, new StringReader(queryText)); + source.Reset(); + } + catch (IOException) + { + source = analyzer.TokenStream(field, new StringReader(queryText)); + } + CachingTokenFilter buffer = new CachingTokenFilter(source); + ITermAttribute termAtt = null; + IPositionIncrementAttribute posIncrAtt = null; + int numTokens = 0; + + bool success = false; + try + { + buffer.Reset(); + success = true; + } + catch (IOException) + { + // success==false if we hit an exception + } + if (success) + { + if (buffer.HasAttribute()) + { + termAtt = buffer.GetAttribute(); + } + if (buffer.HasAttribute()) + { + posIncrAtt = buffer.GetAttribute(); + } + } + + int positionCount = 0; + bool severalTokensAtSamePosition = false; + + bool hasMoreTokens = false; + if (termAtt != null) + { + try + { + hasMoreTokens = buffer.IncrementToken(); + while (hasMoreTokens) + { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.PositionIncrement : 1; + if (positionIncrement != 0) + { + positionCount += positionIncrement; + } + else + { + severalTokensAtSamePosition = true; + } + hasMoreTokens = buffer.IncrementToken(); + } + } + catch (IOException) + { + // ignore + } + } + try + { + // rewind the buffer stream + buffer.Reset(); + + // close original stream - all tokens buffered + source.Close(); + } + catch (IOException) + { + // ignore + } + + if (numTokens == 0) + return null; + else if (numTokens == 1) + { + String term = null; + try + { + bool hasNext = buffer.IncrementToken(); + Debug.Assert(hasNext); + term = termAtt.Term; + } + catch (IOException) + { + // safe to ignore, because we know the number of tokens + } + return NewTermQuery(new Term(field, term)); + } + else + { + if (severalTokensAtSamePosition) + { + if (positionCount == 1) + { + // no phrase query: + BooleanQuery q = NewBooleanQuery(true); + for (int i = 0; i < numTokens; i++) + { + String term = null; + try + { + bool hasNext = buffer.IncrementToken(); + Debug.Assert(hasNext); + term = termAtt.Term; + } + catch (IOException) + { + // safe to ignore, because we know the number of tokens + } + + Query currentQuery = NewTermQuery( + new Term(field, term)); + q.Add(currentQuery, Occur.SHOULD); + } + return q; + } + else + { + // phrase query: + MultiPhraseQuery mpq = NewMultiPhraseQuery(); + mpq.Slop = phraseSlop; + List multiTerms = new List(); + int position = -1; + for (int i = 0; i < numTokens; i++) + { + String term = null; + int positionIncrement = 1; + try + { + bool hasNext = buffer.IncrementToken(); + Debug.Assert(hasNext == true); + term = termAtt.Term; + if (posIncrAtt != null) + { + positionIncrement = posIncrAtt.PositionIncrement; + } + } + catch (IOException) + { + // safe to ignore, because we know the number of tokens + } + + if (positionIncrement > 0 && multiTerms.Count > 0) + { + if (enablePositionIncrements) + { + mpq.Add(multiTerms.ToArray(), position); + } + else + { + mpq.Add(multiTerms.ToArray()); + } + multiTerms.Clear(); + } + position += positionIncrement; + multiTerms.Add(new Term(field, term)); + } + if (enablePositionIncrements) + { + mpq.Add(multiTerms.ToArray(), position); + } + else + { + mpq.Add(multiTerms.ToArray()); + } + return mpq; + } + } + else + { + PhraseQuery pq = NewPhraseQuery(); + pq.Slop = phraseSlop; + int position = -1; + + + for (int i = 0; i < numTokens; i++) + { + String term = null; + int positionIncrement = 1; + + try + { + bool hasNext = buffer.IncrementToken(); + Debug.Assert(hasNext == true); + term = termAtt.Term; + if (posIncrAtt != null) + { + positionIncrement = posIncrAtt.PositionIncrement; + } + } + catch (IOException) + { + // safe to ignore, because we know the number of tokens + } + + if (enablePositionIncrements) + { + position += positionIncrement; + pq.Add(new Term(field, term), position); + } + else + { + pq.Add(new Term(field, term)); + } + } + return pq; + } + } + } + + + /// Base implementation delegates to {@link #GetFieldQuery(String,String)}. + /// This method may be overridden, for example, to return + /// a SpanNearQuery instead of a PhraseQuery. + /// + /// + /// throw in overridden method to disallow + /// + protected internal virtual Query GetFieldQuery(String field, String queryText, int slop) + { + Query query = GetFieldQuery(field, queryText); + + if (query is PhraseQuery) + { + ((PhraseQuery)query).Slop = slop; + } + if (query is MultiPhraseQuery) + { + ((MultiPhraseQuery)query).Slop = slop; + } + + return query; + } + + /// throw in overridden method to disallow + /// + protected internal virtual Query GetRangeQuery(String field, + String part1, + String part2, + bool inclusive) + { + if (lowercaseExpandedTerms) + { + part1 = part1.ToLower(); + part2 = part2.ToLower(); + } + + try + { + DateTime d1, d2; + if (_useJavaStyleDateRangeParsing) + { + // TODO: This doesn't emulate java perfectly. + // Java allows parsing of the string up to the end of the pattern + // and then ignores everything else. .NET will throw an exception, + // so this will fail in those cases, though the code below is clear + // that users can only specify the date, not the time. + var shortFormat = locale.DateTimeFormat.ShortDatePattern; + d1 = DateTime.ParseExact(part1, shortFormat, locale); + d2 = DateTime.ParseExact(part2, shortFormat, locale); + } + else + { + d1 = DateTime.Parse(part1, locale); + d2 = DateTime.Parse(part2, locale); + } + + if (inclusive) + { + // The user can only specify the date, not the time, so make sure + // the time is set to the latest possible time of that date to really + // include all documents: + var cal = locale.Calendar; + d2 = cal.AddHours(d2, 23); + d2 = cal.AddMinutes(d2, 59); + d2 = cal.AddSeconds(d2, 59); + d2 = cal.AddMilliseconds(d2, 999); + } + DateTools.Resolution resolution = getDateResolution(field); + if (resolution == null) + { + // no default or field specific date resolution has been set, + // use deprecated DateField to maintain compatibility with + // pre-1.9 Lucene versions. + part1 = DateField.DateToString(d1); + part2 = DateField.DateToString(d2); + } + else + { + part1 = DateTools.DateToString(d1, resolution); + part2 = DateTools.DateToString(d2, resolution); + } + } + catch (Exception) + { + } + + return NewRangeQuery(field, part1, part2, inclusive); + } + + /// Builds a new BooleanQuery instance + /// disable coord + /// + /// new BooleanQuery instance + /// + protected internal virtual BooleanQuery NewBooleanQuery(bool disableCoord) + { + return new BooleanQuery(disableCoord); + } + + /// Builds a new BooleanClause instance + /// sub query + /// + /// how this clause should occur when matching documents + /// + /// new BooleanClause instance + /// + protected internal virtual BooleanClause NewBooleanClause(Query q, Occur occur) + { + return new BooleanClause(q, occur); + } + + /// Builds a new TermQuery instance + /// term + /// + /// new TermQuery instance + /// + protected internal virtual Query NewTermQuery(Term term) + { + return new TermQuery(term); + } + + /// Builds a new PhraseQuery instance + /// new PhraseQuery instance + /// + protected internal virtual PhraseQuery NewPhraseQuery() + { + return new PhraseQuery(); + } + + /// Builds a new MultiPhraseQuery instance + /// new MultiPhraseQuery instance + /// + protected internal virtual MultiPhraseQuery NewMultiPhraseQuery() + { + return new MultiPhraseQuery(); + } + + /// Builds a new PrefixQuery instance + /// Prefix term + /// + /// new PrefixQuery instance + /// + protected internal virtual Query NewPrefixQuery(Term prefix) + { + return new PrefixQuery(prefix) { RewriteMethod = multiTermRewriteMethod }; + } + + /// Builds a new FuzzyQuery instance + /// Term + /// + /// minimum similarity + /// + /// prefix length + /// + /// new FuzzyQuery Instance + /// + protected internal virtual Query NewFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) + { + // FuzzyQuery doesn't yet allow constant score rewrite + return new FuzzyQuery(term, minimumSimilarity, prefixLength); + } + + /// Builds a new TermRangeQuery instance + /// Field + /// + /// min + /// + /// max + /// + /// true if range is inclusive + /// + /// new TermRangeQuery instance + /// + protected internal virtual Query NewRangeQuery(String field, String part1, String part2, bool inclusive) + { + return new TermRangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator) { RewriteMethod = multiTermRewriteMethod }; + } + + /// Builds a new MatchAllDocsQuery instance + /// new MatchAllDocsQuery instance + /// + protected internal virtual Query NewMatchAllDocsQuery() + { + return new MatchAllDocsQuery(); + } + + /// Builds a new WildcardQuery instance + /// wildcard term + /// + /// new WildcardQuery instance + /// + protected internal virtual Query NewWildcardQuery(Term t) + { + return new WildcardQuery(t) { RewriteMethod = multiTermRewriteMethod }; + } + + /// Factory method for generating query, given a set of clauses. + /// By default creates a boolean query composed of clauses passed in. + /// + /// Can be overridden by extending classes, to modify query being + /// returned. + /// + /// + /// List that contains {@link BooleanClause} instances + /// to join. + /// + /// + /// Resulting {@link Query} object. + /// + /// throw in overridden method to disallow + /// + protected internal virtual Query GetBooleanQuery(IList clauses) + { + return GetBooleanQuery(clauses, false); + } + + /// Factory method for generating query, given a set of clauses. + /// By default creates a boolean query composed of clauses passed in. + /// + /// Can be overridden by extending classes, to modify query being + /// returned. + /// + /// + /// List that contains {@link BooleanClause} instances + /// to join. + /// + /// true if coord scoring should be disabled. + /// + /// + /// Resulting {@link Query} object. + /// + /// throw in overridden method to disallow + /// + protected internal virtual Query GetBooleanQuery(IList clauses, bool disableCoord) + { + if (clauses.Count == 0) + { + return null; // all clause words were filtered away by the analyzer. + } + BooleanQuery query = NewBooleanQuery(disableCoord); + foreach (var clause in clauses) + { + query.Add(clause); + } + return query; + } + + /// Factory method for generating a query. Called when parser + /// parses an input term token that contains one or more wildcard + /// characters (? and *), but is not a prefix term token (one + /// that has just a single * character at the end) + ///

+ /// Depending on settings, prefix term may be lower-cased + /// automatically. It will not go through the default Analyzer, + /// however, since normal Analyzers are unlikely to work properly + /// with wildcard templates. + ///

+ /// Can be overridden by extending classes, to provide custom handling for + /// wildcard queries, which may be necessary due to missing analyzer calls. + /// + ///

+ /// Name of the field query will use. + /// + /// Term token that contains one or more wild card + /// characters (? or *), but is not simple prefix term + /// + /// + /// Resulting {@link Query} built for the term + /// + /// throw in overridden method to disallow + /// + protected internal virtual Query GetWildcardQuery(String field, String termStr) + { + if ("*".Equals(field)) + { + if ("*".Equals(termStr)) return NewMatchAllDocsQuery(); + } + if (!allowLeadingWildcard && (termStr.StartsWith("*") || termStr.StartsWith("?"))) + throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); + if (lowercaseExpandedTerms) + { + termStr = termStr.ToLower(); + } + Term t = new Term(field, termStr); + return NewWildcardQuery(t); + } + + /// Factory method for generating a query (similar to + /// {@link #getWildcardQuery}). Called when parser parses an input term + /// token that uses prefix notation; that is, contains a single '*' wildcard + /// character as its last character. Since this is a special case + /// of generic wildcard term, and such a query can be optimized easily, + /// this usually results in a different query object. + ///

+ /// Depending on settings, a prefix term may be lower-cased + /// automatically. It will not go through the default Analyzer, + /// however, since normal Analyzers are unlikely to work properly + /// with wildcard templates. + ///

+ /// Can be overridden by extending classes, to provide custom handling for + /// wild card queries, which may be necessary due to missing analyzer calls. + /// + ///

+ /// Name of the field query will use. + /// + /// Term token to use for building term for the query + /// (without trailing '*' character!) + /// + /// + /// Resulting {@link Query} built for the term + /// + /// throw in overridden method to disallow + /// + protected internal virtual Query GetPrefixQuery(String field, String termStr) + { + if (!allowLeadingWildcard && termStr.StartsWith("*")) + throw new ParseException("'*' not allowed as first character in PrefixQuery"); + if (lowercaseExpandedTerms) + { + termStr = termStr.ToLower(); + } + Term t = new Term(field, termStr); + return NewPrefixQuery(t); + } + + /// Factory method for generating a query (similar to + /// {@link #getWildcardQuery}). Called when parser parses + /// an input term token that has the fuzzy suffix (~) appended. + /// + /// + /// Name of the field query will use. + /// + /// Term token to use for building term for the query + /// + /// + /// Resulting {@link Query} built for the term + /// + /// throw in overridden method to disallow + /// + protected internal virtual Query GetFuzzyQuery(String field, String termStr, float minSimilarity) + { + if (lowercaseExpandedTerms) + { + termStr = termStr.ToLower(); + } + Term t = new Term(field, termStr); + return NewFuzzyQuery(t, minSimilarity, fuzzyPrefixLength); + } + + + /// Returns a String where the escape char has been + /// removed, or kept only once if there was a double escape. + /// + /// Supports escaped unicode characters, e. g. translates + /// \\u0041 to A. + /// + /// + private String DiscardEscapeChar(String input) + { + // Create char array to hold unescaped char sequence + char[] output = new char[input.Length]; + + // The Length of the output can be less than the input + // due to discarded escape chars. This variable holds + // the actual Length of the output + int Length = 0; + + // We remember whether the last processed character was + // an escape character + bool lastCharWasEscapeChar = false; + + // The multiplier the current unicode digit must be multiplied with. + // E. g. the first digit must be multiplied with 16^3, the second with 16^2... + int codePointMultiplier = 0; + + // Used to calculate the codepoint of the escaped unicode character + int codePoint = 0; + + for (int i = 0; i < input.Length; i++) + { + char curChar = input[i]; + if (codePointMultiplier > 0) + { + codePoint += HexToInt(curChar) * codePointMultiplier; + codePointMultiplier = Number.URShift(codePointMultiplier, 4); + if (codePointMultiplier == 0) + { + output[Length++] = (char)codePoint; + codePoint = 0; + } + } + else if (lastCharWasEscapeChar) + { + if (curChar == 'u') + { + // found an escaped unicode character + codePointMultiplier = 16 * 16 * 16; + } + else + { + // this character was escaped + output[Length] = curChar; + Length++; + } + lastCharWasEscapeChar = false; + } + else + { + if (curChar == '\\') + { + lastCharWasEscapeChar = true; + } + else + { + output[Length] = curChar; + Length++; + } + } + } + + if (codePointMultiplier > 0) + { + throw new ParseException("Truncated unicode escape sequence."); + } + + if (lastCharWasEscapeChar) + { + throw new ParseException("Term can not end with escape character."); + } + + return new String(output, 0, Length); + } + + /// Returns the numeric value of the hexadecimal character + private static int HexToInt(char c) + { + if ('0' <= c && c <= '9') + { + return c - '0'; + } + else if ('a' <= c && c <= 'f') + { + return c - 'a' + 10; + } + else if ('A' <= c && c <= 'F') + { + return c - 'A' + 10; + } + else + { + throw new ParseException("None-hex character in unicode escape sequence: " + c); + } + } + + /// Returns a String where those characters that QueryParser + /// expects to be escaped are escaped by a preceding \. + /// + public static String Escape(String s) + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.Length; i++) + { + char c = s[i]; + // These characters are part of the query syntax and must be escaped + if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' + || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' + || c == '*' || c == '?' || c == '|' || c == '&') + { + sb.Append('\\'); + } + sb.Append(c); + } + return sb.ToString(); + } + + /// Command line tool to test QueryParser, using {@link Lucene.Net.Analysis.SimpleAnalyzer}. + /// Usage:
+ /// java Lucene.Net.QueryParsers.QueryParser <input> + ///
+ [STAThread] + public static void Main(String[] args) + { + if (args.Length == 0) + { + Console.WriteLine("Usage: java org.apache.lucene.queryParser.QueryParser "); + Environment.Exit(0); + } + QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer()); + Query q = qp.Parse(args[0]); + Console.WriteLine(q.ToString("field")); + } + + // * Query ::= ( Clause )* + // * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) + public int Conjunction() + { + int ret = CONJ_NONE; + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case AndToken: + case OrToken: + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case AndToken: + Jj_consume_token(AndToken); + ret = CONJ_AND; + break; + case OrToken: + Jj_consume_token(OrToken); + ret = CONJ_OR; + break; + default: + jj_la1[0] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + break; + default: + jj_la1[1] = jj_gen; + break; + } + { + if (true) return ret; + } + throw new ApplicationException("Missing return statement in function"); + } + + public int Modifiers() + { + int ret = MOD_NONE; + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case NotToken: + case PlusToken: + case MinusToken: + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case PlusToken: + Jj_consume_token(PlusToken); + ret = MOD_REQ; + break; + case MinusToken: + Jj_consume_token(MinusToken); + ret = MOD_NOT; + break; + case NotToken: + Jj_consume_token(NotToken); + ret = MOD_NOT; + break; + default: + jj_la1[2] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + break; + default: + jj_la1[3] = jj_gen; + break; + } + { + if (true) return ret; + } + throw new Exception("Missing return statement in function"); + } + + // This makes sure that there is no garbage after the query string + public Query TopLevelQuery(String field) + { + Query q; + q = Query(field); + Jj_consume_token(0); + { + if (true) return q; + } + throw new Exception("Missing return statement in function"); + } + + public Query Query(String field) + { + List clauses = new List(); + Query q, firstQuery = null; + int conj, mods; + mods = Modifiers(); + q = Clause(field); + AddClause(clauses, CONJ_NONE, mods, q); + if (mods == MOD_NONE) + firstQuery = q; + while (true) + { + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case AndToken: + case OrToken: + case NotToken: + case PlusToken: + case MinusToken: + case LParanToken: + case StarToken: + case QuotedToken: + case TermToken: + case PrefixTermToken: + case WildTermToken: + case RangeInStartToken: + case RangeExStartToken: + case NumberToken: + break; + default: + jj_la1[4] = jj_gen; + goto label_1; + } + + conj = Conjunction(); + mods = Modifiers(); + q = Clause(field); + AddClause(clauses, conj, mods, q); + } + + label_1: + + if (clauses.Count == 1 && firstQuery != null) + { + if (true) return firstQuery; + } + + return GetBooleanQuery(clauses); + } + + public Query Clause(String field) + { + Query q; + Token fieldToken = null, boost = null; + if (Jj_2_1(2)) + { + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case TermToken: + fieldToken = Jj_consume_token(TermToken); + Jj_consume_token(ColonToken); + field = DiscardEscapeChar(fieldToken.image); + break; + case StarToken: + Jj_consume_token(StarToken); + Jj_consume_token(ColonToken); + field = "*"; + break; + default: + jj_la1[5] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + } + else + { + ; + } + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case StarToken: + case QuotedToken: + case TermToken: + case PrefixTermToken: + case WildTermToken: + case RangeInStartToken: + case RangeExStartToken: + case NumberToken: + q = Term(field); + break; + case LParanToken: + Jj_consume_token(LParanToken); + q = Query(field); + Jj_consume_token(RParenToken); + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case CaratToken: + Jj_consume_token(CaratToken); + boost = Jj_consume_token(NumberToken); + break; + default: + jj_la1[6] = jj_gen; + break; + } + break; + default: + jj_la1[7] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + if (boost != null) + { + try + { + float f = Single.Parse(boost.image); + q.Boost = f; + } + catch (Exception) + { + } + } + { + if (true) return q; + } + throw new Exception("Missing return statement in function"); + } + + public Query Term(String field) + { + Token term, boost = null, fuzzySlop = null, goop1, goop2; + bool prefix = false; + bool wildcard = false; + bool fuzzy = false; + Query q; + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case StarToken: + case TermToken: + case PrefixTermToken: + case WildTermToken: + case NumberToken: + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case TermToken: + term = Jj_consume_token(TermToken); + break; + case StarToken: + term = Jj_consume_token(StarToken); + wildcard = true; + break; + case PrefixTermToken: + term = Jj_consume_token(PrefixTermToken); + prefix = true; + break; + case WildTermToken: + term = Jj_consume_token(WildTermToken); + wildcard = true; + break; + case NumberToken: + term = Jj_consume_token(NumberToken); + break; + default: + jj_la1[8] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case FuzzySlopToken: + fuzzySlop = Jj_consume_token(FuzzySlopToken); + fuzzy = true; + break; + default: + jj_la1[9] = jj_gen; + break; + } + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case CaratToken: + Jj_consume_token(CaratToken); + boost = Jj_consume_token(NumberToken); + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case FuzzySlopToken: + fuzzySlop = Jj_consume_token(FuzzySlopToken); + fuzzy = true; + break; + default: + jj_la1[10] = jj_gen; + break; + } + break; + default: + jj_la1[11] = jj_gen; + break; + } + String termImage = DiscardEscapeChar(term.image); + if (wildcard) + { + q = GetWildcardQuery(field, termImage); + } + else if (prefix) + { + q = GetPrefixQuery(field, + DiscardEscapeChar(term.image.Substring(0, (term.image.Length - 1) - (0)))); + } + else if (fuzzy) + { + float fms = fuzzyMinSim; + try + { + fms = Single.Parse(fuzzySlop.image.Substring(1)); + } + catch (Exception) + { + } + if (fms < 0.0f || fms > 1.0f) + { + { + if (true) + throw new ParseException( + "Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); + } + } + q = GetFuzzyQuery(field, termImage, fms); + } + else + { + q = GetFieldQuery(field, termImage); + } + break; + case RangeInStartToken: + Jj_consume_token(RangeInStartToken); + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case RangeInGoopToken: + goop1 = Jj_consume_token(RangeInGoopToken); + break; + case RangeInQuotedToken: + goop1 = Jj_consume_token(RangeInQuotedToken); + break; + default: + jj_la1[12] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case RangeInToToken: + Jj_consume_token(RangeInToToken); + break; + default: + jj_la1[13] = jj_gen; + break; + } + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case RangeInGoopToken: + goop2 = Jj_consume_token(RangeInGoopToken); + break; + case RangeInQuotedToken: + goop2 = Jj_consume_token(RangeInQuotedToken); + break; + default: + jj_la1[14] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + Jj_consume_token(RangeInEndToken); + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case CaratToken: + Jj_consume_token(CaratToken); + boost = Jj_consume_token(NumberToken); + break; + default: + jj_la1[15] = jj_gen; + break; + } + if (goop1.kind == RangeInQuotedToken) + { + goop1.image = goop1.image.Substring(1, (goop1.image.Length - 1) - (1)); + } + if (goop2.kind == RangeInQuotedToken) + { + goop2.image = goop2.image.Substring(1, (goop2.image.Length - 1) - (1)); + } + q = GetRangeQuery(field, DiscardEscapeChar(goop1.image), DiscardEscapeChar(goop2.image), true); + break; + case RangeExStartToken: + Jj_consume_token(RangeExStartToken); + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case RangeExGoopToken: + goop1 = Jj_consume_token(RangeExGoopToken); + break; + case RangeExQuotedToken: + goop1 = Jj_consume_token(RangeExQuotedToken); + break; + default: + jj_la1[16] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case RangeExToToken: + Jj_consume_token(RangeExToToken); + break; + default: + jj_la1[17] = jj_gen; + break; + } + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case RangeExGoopToken: + goop2 = Jj_consume_token(RangeExGoopToken); + break; + case RangeExQuotedToken: + goop2 = Jj_consume_token(RangeExQuotedToken); + break; + default: + jj_la1[18] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + Jj_consume_token(RangeExEndToken); + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case CaratToken: + Jj_consume_token(CaratToken); + boost = Jj_consume_token(NumberToken); + break; + default: + jj_la1[19] = jj_gen; + break; + } + if (goop1.kind == RangeExQuotedToken) + { + goop1.image = goop1.image.Substring(1, (goop1.image.Length - 1) - (1)); + } + if (goop2.kind == RangeExQuotedToken) + { + goop2.image = goop2.image.Substring(1, (goop2.image.Length - 1) - (1)); + } + + q = GetRangeQuery(field, DiscardEscapeChar(goop1.image), DiscardEscapeChar(goop2.image), false); + break; + case QuotedToken: + term = Jj_consume_token(QuotedToken); + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case FuzzySlopToken: + fuzzySlop = Jj_consume_token(FuzzySlopToken); + break; + default: + jj_la1[20] = jj_gen; + break; + } + switch ((jj_ntk == -1) ? Jj_ntk() : jj_ntk) + { + case CaratToken: + Jj_consume_token(CaratToken); + boost = Jj_consume_token(NumberToken); + break; + default: + jj_la1[21] = jj_gen; + break; + } + int s = phraseSlop; + + if (fuzzySlop != null) + { + try + { + s = (int)Single.Parse(fuzzySlop.image.Substring(1)); + } + catch (Exception) + { + } + } + q = GetFieldQuery(field, DiscardEscapeChar(term.image.Substring(1, (term.image.Length - 1) - (1))), + s); + break; + default: + jj_la1[22] = jj_gen; + Jj_consume_token(-1); + throw new ParseException(); + } + if (boost != null) + { + float f = (float)1.0; + try + { + f = Single.Parse(boost.image); + } + catch (Exception) + { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + + // avoid boosting null queries, such as those caused by stop words + if (q != null) + { + q.Boost = f; + } + } + { + if (true) return q; + } + throw new Exception("Missing return statement in function"); + } + + private bool Jj_2_1(int xla) + { + jj_la = xla; + jj_lastpos = jj_scanpos = token; + try + { + return !Jj_3_1(); + } + catch (LookaheadSuccess) + { + return true; + } + finally + { + Jj_save(0, xla); + } + } + + private bool Jj_3R_2() + { + if (jj_scan_token(TermToken)) return true; + if (jj_scan_token(ColonToken)) return true; + return false; + } + + private bool Jj_3_1() + { + Token xsp; + xsp = jj_scanpos; + if (Jj_3R_2()) + { + jj_scanpos = xsp; + if (Jj_3R_3()) return true; + } + return false; + } + + private bool Jj_3R_3() + { + if (jj_scan_token(StarToken)) return true; + if (jj_scan_token(ColonToken)) return true; + return false; + } + + /* Generated Token Manager. */ + public QueryParserTokenManager token_source; + /* Current token. */ + public Token token; + /* Next token. */ + public Token jj_nt; + private int jj_ntk; + private Token jj_scanpos, jj_lastpos; + private int jj_la; + private int jj_gen; + private int[] jj_la1 = new int[23]; + private static int[] jj_la1_0; + private static int[] jj_la1_1; + + private static void Jj_la1_init_0() + { + jj_la1_0 = new int[] + { + 0x300, 0x300, 0x1c00, 0x1c00, 0x3ed3f00, 0x90000, 0x20000, 0x3ed2000, 0x2690000, 0x100000, + 0x100000, 0x20000, 0x30000000, 0x4000000, 0x30000000, 0x20000, 0x0, 0x40000000, 0x0, 0x20000 + , 0x100000, 0x20000, 0x3ed0000, + }; + } + + private static void Jj_la1_init_1() + { + jj_la1_1 = new int[] + { + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, + 0x3, 0x0, 0x0, 0x0, 0x0, + }; + } + + private JJCalls[] jj_2_rtns = new JJCalls[1]; + private bool jj_rescan = false; + private int jj_gc = 0; + + /// Constructor with user supplied CharStream. + protected internal QueryParser(ICharStream stream) + { + token_source = new QueryParserTokenManager(stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 23; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /// Reinitialise. + public void ReInit(ICharStream stream) + { + token_source.ReInit(stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 23; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /// Constructor with generated Token Manager. + protected QueryParser(QueryParserTokenManager tm) + { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 23; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /// Reinitialise. + public void ReInit(QueryParserTokenManager tm) + { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 23; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.Length; i++) jj_2_rtns[i] = new JJCalls(); + } + + private Token Jj_consume_token(int kind) + { + Token oldToken; + if ((oldToken = token).next != null) token = token.next; + else token = token.next = token_source.GetNextToken(); + jj_ntk = -1; + if (token.kind == kind) + { + jj_gen++; + if (++jj_gc > 100) + { + jj_gc = 0; + for (int i = 0; i < jj_2_rtns.Length; i++) + { + JJCalls c = jj_2_rtns[i]; + while (c != null) + { + if (c.gen < jj_gen) c.first = null; + c = c.next; + } + } + } + return token; + } + token = oldToken; + jj_kind = kind; + throw GenerateParseException(); + } + + [Serializable] + private sealed class LookaheadSuccess : System.Exception + { + } + + private LookaheadSuccess jj_ls = new LookaheadSuccess(); + private bool jj_scan_token(int kind) + { + if (jj_scanpos == jj_lastpos) + { + jj_la--; + if (jj_scanpos.next == null) + { + jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.GetNextToken(); + } + else + { + jj_lastpos = jj_scanpos = jj_scanpos.next; + } + } + else + { + jj_scanpos = jj_scanpos.next; + } + if (jj_rescan) + { + int i = 0; + Token tok = token; + while (tok != null && tok != jj_scanpos) + { + i++; + tok = tok.next; + } + if (tok != null) Jj_add_error_token(kind, i); + } + if (jj_scanpos.kind != kind) return true; + if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls; + return false; + } + + /// Get the next Token. + public Token GetNextToken() + { + if (token.next != null) token = token.next; + else token = token.next = token_source.GetNextToken(); + jj_ntk = -1; + jj_gen++; + return token; + } + + /// Get the specific Token. + public Token getToken(int index) + { + Token t = token; + for (int i = 0; i < index; i++) + { + if (t.next != null) t = t.next; + else t = t.next = token_source.GetNextToken(); + } + return t; + } + + private int Jj_ntk() + { + if ((jj_nt = token.next) == null) + return (jj_ntk = (token.next = token_source.GetNextToken()).kind); + else + return (jj_ntk = jj_nt.kind); + } + + private List jj_expentries = new List(); + private int[] jj_expentry; + private int jj_kind = -1; + private int[] jj_lasttokens = new int[100]; + private int jj_endpos; + + private void Jj_add_error_token(int kind, int pos) + { + if (pos >= 100) return; + if (pos == jj_endpos + 1) + { + jj_lasttokens[jj_endpos++] = kind; + } + else if (jj_endpos != 0) + { + jj_expentry = new int[jj_endpos]; + for (int i = 0; i < jj_endpos; i++) + { + jj_expentry[i] = jj_lasttokens[i]; + } + + foreach (var oldentry in jj_expentries) + { + if (oldentry.Length == jj_expentry.Length) + { + for (int i = 0; i < jj_expentry.Length; i++) + { + if (oldentry[i] != jj_expentry[i]) + { + continue; + } + } + jj_expentries.Add(jj_expentry); + break; + } + } + if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; + } + } + + /// Generate ParseException. + public virtual ParseException GenerateParseException() + { + jj_expentries.Clear(); + bool[] la1tokens = new bool[34]; + if (jj_kind >= 0) + { + la1tokens[jj_kind] = true; + jj_kind = -1; + } + for (int i = 0; i < 23; i++) + { + if (jj_la1[i] == jj_gen) + { + for (int j = 0; j < 32; j++) + { + if ((jj_la1_0[i] & (1 << j)) != 0) + { + la1tokens[j] = true; + } + if ((jj_la1_1[i] & (1 << j)) != 0) + { + la1tokens[32 + j] = true; + } + } + } + } + for (int i = 0; i < 34; i++) + { + if (la1tokens[i]) + { + jj_expentry = new int[1]; + jj_expentry[0] = i; + jj_expentries.Add(jj_expentry); + } + } + jj_endpos = 0; + Jj_rescan_token(); + Jj_add_error_token(0, 0); + int[][] exptokseq = new int[jj_expentries.Count][]; + for (int i = 0; i < jj_expentries.Count; i++) + { + exptokseq[i] = jj_expentries[i]; + } + return new ParseException(token, exptokseq, tokenImage); + } + + /// Enable tracing. + public void Enable_tracing() + { + } + + /// Disable tracing. + public void Disable_tracing() + { + } + + private void Jj_rescan_token() + { + jj_rescan = true; + for (int i = 0; i < 1; i++) + { + try + { + JJCalls p = jj_2_rtns[i]; + do + { + if (p.gen > jj_gen) + { + jj_la = p.arg; + jj_lastpos = jj_scanpos = p.first; + switch (i) + { + case 0: + Jj_3_1(); + break; + } + } + p = p.next; + } while (p != null); + } + catch (LookaheadSuccess) + { + } + } + jj_rescan = false; + } + + private void Jj_save(int index, int xla) + { + JJCalls p = jj_2_rtns[index]; + while (p.gen > jj_gen) + { + if (p.next == null) + { + p = p.next = new JJCalls(); + break; + } + p = p.next; + } + p.gen = jj_gen + xla - jj_la; + p.first = token; + p.arg = xla; + } + + internal sealed class JJCalls + { + internal int gen; + internal Token first; + internal int arg; + internal JJCalls next; + } + + static QueryParser() + { + { + Jj_la1_init_0(); + Jj_la1_init_1(); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/QueryParserConstants.cs b/external/Lucene.Net.Light/src/core/QueryParser/QueryParserConstants.cs new file mode 100644 index 0000000000..54e0c1f5e8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/QueryParserConstants.cs @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Generated By:JavaCC: Do not edit this line. QueryParserConstants.java */ + +using System; + +namespace Lucene.Net.QueryParsers +{ + + + /// Token literal values and constants. + /// Generated by org.javacc.parser.OtherFilesGen#start() + /// + public class QueryParserConstants + { + /// End of File. + protected internal const int EndOfFileToken = 0; + /// RegularExpression Id. + protected internal const int NumCharToken = 1; + /// RegularExpression Id. + protected internal const int EscapedCharToken = 2; + /// RegularExpression Id. + protected internal const int TermStartCharToken = 3; + /// RegularExpression Id. + protected internal const int TermCharToken = 4; + /// RegularExpression Id. + protected internal const int WhitespaceToken = 5; + /// RegularExpression Id. + protected internal const int QuotedCharToken = 6; + /// RegularExpression Id. + protected internal const int AndToken = 8; + /// RegularExpression Id. + protected internal const int OrToken = 9; + /// RegularExpression Id. + protected internal const int NotToken = 10; + /// RegularExpression Id. + protected internal const int PlusToken = 11; + /// RegularExpression Id. + protected internal const int MinusToken = 12; + /// RegularExpression Id. + protected internal const int LParanToken = 13; + /// RegularExpression Id. + protected internal const int RParenToken = 14; + /// RegularExpression Id. + protected internal const int ColonToken = 15; + /// RegularExpression Id. + protected internal const int StarToken = 16; + /// RegularExpression Id. + protected internal const int CaratToken = 17; + /// RegularExpression Id. + protected internal const int QuotedToken = 18; + /// RegularExpression Id. + protected internal const int TermToken = 19; + /// RegularExpression Id. + protected internal const int FuzzySlopToken = 20; + /// RegularExpression Id. + protected internal const int PrefixTermToken = 21; + /// RegularExpression Id. + protected internal const int WildTermToken = 22; + /// RegularExpression Id. + protected internal const int RangeInStartToken = 23; + /// RegularExpression Id. + protected internal const int RangeExStartToken = 24; + /// RegularExpression Id. + protected internal const int NumberToken = 25; + /// RegularExpression Id. + protected internal const int RangeInToToken = 26; + /// RegularExpression Id. + protected internal const int RangeInEndToken = 27; + /// RegularExpression Id. + protected internal const int RangeInQuotedToken = 28; + /// RegularExpression Id. + protected internal const int RangeInGoopToken = 29; + /// RegularExpression Id. + protected internal const int RangeExToToken = 30; + /// RegularExpression Id. + protected internal const int RangeExEndToken = 31; + /// RegularExpression Id. + protected internal const int RangeExQuotedToken = 32; + /// RegularExpression Id. + protected internal const int RangeExGoopToken = 33; + /// Lexical state. + protected internal const int BoostToken = 0; + /// Lexical state. + protected const int RangeExToken = 1; + /// Lexical state. + protected internal const int RangeInToken = 2; + /// Lexical state. + protected internal const int DefaultToken = 3; + /// Literal token values. + protected internal static System.String[] tokenImage = new System.String[] { + "", + "<_NUM_CHAR>", + "<_ESCAPED_CHAR>", + "<_TERM_START_CHAR>", + "<_TERM_CHAR>", + "<_WHITESPACE>", + "<_QUOTED_CHAR>", + "", + "", + "", + "", + "\"+\"", + "\"-\"", + "\"(\"", + "\")\"", + "\":\"", + "\"*\"", + "\"^\"", + "", + "", + "", + "", + "", + "\"[\"", + "\"{\"", + "", + "\"TO\"", + "\"]\"", + "", + "", + "\"TO\"", + "\"}\"", + "", + "" + }; + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/QueryParserTokenManager.cs b/external/Lucene.Net.Light/src/core/QueryParser/QueryParserTokenManager.cs new file mode 100644 index 0000000000..239d824619 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/QueryParserTokenManager.cs @@ -0,0 +1,1462 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */ + +namespace Lucene.Net.QueryParsers +{ + + /// Token Manager. + public class QueryParserTokenManager : QueryParserConstants + { + private void InitBlock() + { + System.IO.StreamWriter temp_writer; + temp_writer = new System.IO.StreamWriter(System.Console.OpenStandardOutput(), System.Console.Out.Encoding); + temp_writer.AutoFlush = true; + debugStream = temp_writer; + } + + /// Debug output. + public System.IO.StreamWriter debugStream; + /// Set debug output. + public virtual void SetDebugStream(System.IO.StreamWriter ds) + { + debugStream = ds; + } + private int JjStopStringLiteralDfa_3(int pos, long active0) + { + switch (pos) + { + + default: + return - 1; + + } + } + private int JjStartNfa_3(int pos, long active0) + { + return JjMoveNfa_3(JjStopStringLiteralDfa_3(pos, active0), pos + 1); + } + private int JjStopAtPos(int pos, int kind) + { + jjmatchedKind = kind; + jjmatchedPos = pos; + return pos + 1; + } + private int JjMoveStringLiteralDfa0_3() + { + switch (curChar) + { + + case (char) (40): + return JjStopAtPos(0, 13); + + case (char) (41): + return JjStopAtPos(0, 14); + + case (char) (42): + return JjStartNfaWithStates_3(0, 16, 36); + + case (char) (43): + return JjStopAtPos(0, 11); + + case (char) (45): + return JjStopAtPos(0, 12); + + case (char) (58): + return JjStopAtPos(0, 15); + + case (char) (91): + return JjStopAtPos(0, 23); + + case (char) (94): + return JjStopAtPos(0, 17); + + case (char) (123): + return JjStopAtPos(0, 24); + + default: + return JjMoveNfa_3(0, 0); + + } + } + private int JjStartNfaWithStates_3(int pos, int kind, int state) + { + jjmatchedKind = kind; + jjmatchedPos = pos; + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + return pos + 1; + } + return JjMoveNfa_3(state, pos + 1); + } + internal static readonly ulong[] jjbitVec0 = new ulong[]{0x1L, 0x0L, 0x0L, 0x0L}; + internal static readonly ulong[] jjbitVec1 = new ulong[]{0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL}; + internal static readonly ulong[] jjbitVec3 = new ulong[]{0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL}; + internal static readonly ulong[] jjbitVec4 = new ulong[]{0xfffefffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL}; + private int JjMoveNfa_3(int startState, int curPos) + { + int startsAt = 0; + jjnewStateCnt = 36; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (; ; ) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + ulong l = (ulong) (1L << (int) curChar); + do + { + switch (jjstateSet[--i]) + { + + case 36: + case 25: + if ((0xfbfffcf8ffffd9ffL & l) == (ulong) 0L) + break; + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 0: + if ((0xfbffd4f8ffffd9ffL & l) != (ulong) 0L) + { + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + } + else if ((0x100002600L & l) != 0L) + { + if (kind > 7) + kind = 7; + } + else if (curChar == 34) + JjCheckNAddStates(0, 2); + else if (curChar == 33) + { + if (kind > 10) + kind = 10; + } + if ((0x7bffd0f8ffffd9ffL & l) != 0L) + { + if (kind > 19) + kind = 19; + JjCheckNAddStates(3, 7); + } + else if (curChar == 42) + { + if (kind > 21) + kind = 21; + } + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 4; + break; + + case 4: + if (curChar == 38 && kind > 8) + kind = 8; + break; + + case 5: + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 4; + break; + + case 13: + if (curChar == 33 && kind > 10) + kind = 10; + break; + + case 14: + if (curChar == 34) + JjCheckNAddStates(0, 2); + break; + + case 15: + if ((0xfffffffbffffffffL & l) != (ulong) 0L) + JjCheckNAddStates(0, 2); + break; + + case 17: + JjCheckNAddStates(0, 2); + break; + + case 18: + if (curChar == 34 && kind > 18) + kind = 18; + break; + + case 20: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 20) + kind = 20; + JjAddStates(8, 9); + break; + + case 21: + if (curChar == 46) + JjCheckNAdd(22); + break; + + case 22: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 20) + kind = 20; + JjCheckNAdd(22); + break; + + case 23: + if (curChar == 42 && kind > 21) + kind = 21; + break; + + case 24: + if ((0xfbffd4f8ffffd9ffL & l) == (ulong) 0L) + break; + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 27: + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 28: + if ((0x7bffd0f8ffffd9ffL & l) == 0L) + break; + if (kind > 19) + kind = 19; + JjCheckNAddStates(3, 7); + break; + + case 29: + if ((0x7bfff8f8ffffd9ffL & l) == 0L) + break; + if (kind > 19) + kind = 19; + JjCheckNAddTwoStates(29, 30); + break; + + case 31: + if (kind > 19) + kind = 19; + JjCheckNAddTwoStates(29, 30); + break; + + case 32: + if ((0x7bfff8f8ffffd9ffL & l) != 0L) + JjCheckNAddStates(10, 12); + break; + + case 34: + JjCheckNAddStates(10, 12); + break; + + default: break; + + } + } + while (i != startsAt); + } + else if (curChar < 128) + { + ulong l = (ulong) (1L << (curChar & 63)); + do + { + switch (jjstateSet[--i]) + { + + case 36: + if ((0x97ffffff87ffffffL & l) != (ulong) 0L) + { + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + } + else if (curChar == 92) + JjCheckNAddTwoStates(27, 27); + break; + + case 0: + if ((0x97ffffff87ffffffL & l) != (ulong) 0L) + { + if (kind > 19) + kind = 19; + JjCheckNAddStates(3, 7); + } + else if (curChar == 92) + JjCheckNAddStates(13, 15); + else if (curChar == 126) + { + if (kind > 20) + kind = 20; + jjstateSet[jjnewStateCnt++] = 20; + } + if ((0x97ffffff87ffffffL & l) != (ulong) 0L) + { + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + } + if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 11; + else if (curChar == 124) + jjstateSet[jjnewStateCnt++] = 8; + else if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 6; + else if (curChar == 65) + jjstateSet[jjnewStateCnt++] = 2; + break; + + case 1: + if (curChar == 68 && kind > 8) + kind = 8; + break; + + case 2: + if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 1; + break; + + case 3: + if (curChar == 65) + jjstateSet[jjnewStateCnt++] = 2; + break; + + case 6: + if (curChar == 82 && kind > 9) + kind = 9; + break; + + case 7: + if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 6; + break; + + case 8: + if (curChar == 124 && kind > 9) + kind = 9; + break; + + case 9: + if (curChar == 124) + jjstateSet[jjnewStateCnt++] = 8; + break; + + case 10: + if (curChar == 84 && kind > 10) + kind = 10; + break; + + case 11: + if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 10; + break; + + case 12: + if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 11; + break; + + case 15: + if ((0xffffffffefffffffL & l) != (ulong) 0L) + JjCheckNAddStates(0, 2); + break; + + case 16: + if (curChar == 92) + jjstateSet[jjnewStateCnt++] = 17; + break; + + case 17: + JjCheckNAddStates(0, 2); + break; + + case 19: + if (curChar != 126) + break; + if (kind > 20) + kind = 20; + jjstateSet[jjnewStateCnt++] = 20; + break; + + case 24: + if ((0x97ffffff87ffffffL & l) == (ulong) 0L) + break; + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 25: + if ((0x97ffffff87ffffffL & l) == (ulong) 0L) + break; + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 26: + if (curChar == 92) + JjCheckNAddTwoStates(27, 27); + break; + + case 27: + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 28: + if ((0x97ffffff87ffffffL & l) == (ulong) 0L) + break; + if (kind > 19) + kind = 19; + JjCheckNAddStates(3, 7); + break; + + case 29: + if ((0x97ffffff87ffffffL & l) == (ulong) 0L) + break; + if (kind > 19) + kind = 19; + JjCheckNAddTwoStates(29, 30); + break; + + case 30: + if (curChar == 92) + JjCheckNAddTwoStates(31, 31); + break; + + case 31: + if (kind > 19) + kind = 19; + JjCheckNAddTwoStates(29, 30); + break; + + case 32: + if ((0x97ffffff87ffffffL & l) != (ulong) 0L) + JjCheckNAddStates(10, 12); + break; + + case 33: + if (curChar == 92) + JjCheckNAddTwoStates(34, 34); + break; + + case 34: + JjCheckNAddStates(10, 12); + break; + + case 35: + if (curChar == 92) + JjCheckNAddStates(13, 15); + break; + + default: break; + + } + } + while (i != startsAt); + } + else + { + int hiByte = (int) (curChar >> 8); + int i1 = hiByte >> 6; + ulong l1 = (ulong) (1L << (hiByte & 63)); + int i2 = (curChar & 0xff) >> 6; + ulong l2 = (ulong) (1L << (curChar & 63)); + do + { + switch (jjstateSet[--i]) + { + + case 36: + case 25: + if (!JjCanMove_2(hiByte, i1, i2, l1, l2)) + break; + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 0: + if (JjCanMove_0(hiByte, i1, i2, l1, l2)) + { + if (kind > 7) + kind = 7; + } + if (JjCanMove_2(hiByte, i1, i2, l1, l2)) + { + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + } + if (JjCanMove_2(hiByte, i1, i2, l1, l2)) + { + if (kind > 19) + kind = 19; + JjCheckNAddStates(3, 7); + } + break; + + case 15: + case 17: + if (JjCanMove_1(hiByte, i1, i2, l1, l2)) + JjCheckNAddStates(0, 2); + break; + + case 24: + if (!JjCanMove_2(hiByte, i1, i2, l1, l2)) + break; + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 27: + if (!JjCanMove_1(hiByte, i1, i2, l1, l2)) + break; + if (kind > 22) + kind = 22; + JjCheckNAddTwoStates(25, 26); + break; + + case 28: + if (!JjCanMove_2(hiByte, i1, i2, l1, l2)) + break; + if (kind > 19) + kind = 19; + JjCheckNAddStates(3, 7); + break; + + case 29: + if (!JjCanMove_2(hiByte, i1, i2, l1, l2)) + break; + if (kind > 19) + kind = 19; + JjCheckNAddTwoStates(29, 30); + break; + + case 31: + if (!JjCanMove_1(hiByte, i1, i2, l1, l2)) + break; + if (kind > 19) + kind = 19; + JjCheckNAddTwoStates(29, 30); + break; + + case 32: + if (JjCanMove_2(hiByte, i1, i2, l1, l2)) + JjCheckNAddStates(10, 12); + break; + + case 34: + if (JjCanMove_1(hiByte, i1, i2, l1, l2)) + JjCheckNAddStates(10, 12); + break; + + default: break; + + } + } + while (i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt))) + return curPos; + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + return curPos; + } + } + } + private int JjStopStringLiteralDfa_1(int pos, long active0) + { + switch (pos) + { + + case 0: + if ((active0 & 0x40000000L) != 0L) + { + jjmatchedKind = 33; + return 6; + } + return - 1; + + default: + return - 1; + + } + } + private int JjStartNfa_1(int pos, long active0) + { + return JjMoveNfa_1(JjStopStringLiteralDfa_1(pos, active0), pos + 1); + } + private int JjMoveStringLiteralDfa0_1() + { + switch (curChar) + { + + case (char) (84): + return JjMoveStringLiteralDfa1_1(0x40000000L); + + case (char) (125): + return JjStopAtPos(0, 31); + + default: + return JjMoveNfa_1(0, 0); + + } + } + private int JjMoveStringLiteralDfa1_1(long active0) + { + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + JjStopStringLiteralDfa_1(0, active0); + return 1; + } + switch (curChar) + { + + case (char) (79): + if ((active0 & 0x40000000L) != 0L) + return JjStartNfaWithStates_1(1, 30, 6); + break; + + default: + break; + + } + return JjStartNfa_1(0, active0); + } + private int JjStartNfaWithStates_1(int pos, int kind, int state) + { + jjmatchedKind = kind; + jjmatchedPos = pos; + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + return pos + 1; + } + return JjMoveNfa_1(state, pos + 1); + } + private int JjMoveNfa_1(int startState, int curPos) + { + int startsAt = 0; + jjnewStateCnt = 7; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (; ; ) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + ulong l = (ulong) (1L << (int) curChar); + do + { + switch (jjstateSet[--i]) + { + + case 0: + if ((0xfffffffeffffffffL & l) != (ulong) 0L) + { + if (kind > 33) + kind = 33; + JjCheckNAdd(6); + } + if ((0x100002600L & l) != 0L) + { + if (kind > 7) + kind = 7; + } + else if (curChar == 34) + JjCheckNAddTwoStates(2, 4); + break; + + case 1: + if (curChar == 34) + JjCheckNAddTwoStates(2, 4); + break; + + case 2: + if ((0xfffffffbffffffffL & l) != (ulong) 0L) + JjCheckNAddStates(16, 18); + break; + + case 3: + if (curChar == 34) + JjCheckNAddStates(16, 18); + break; + + case 5: + if (curChar == 34 && kind > 32) + kind = 32; + break; + + case 6: + if ((0xfffffffeffffffffL & l) == (ulong) 0L) + break; + if (kind > 33) + kind = 33; + JjCheckNAdd(6); + break; + + default: break; + + } + } + while (i != startsAt); + } + else if (curChar < 128) + { + ulong l = (ulong) (1L << (curChar & 63)); + do + { + switch (jjstateSet[--i]) + { + + case 0: + case 6: + if ((0xdfffffffffffffffL & l) == (ulong) 0L) + break; + if (kind > 33) + kind = 33; + JjCheckNAdd(6); + break; + + case 2: + JjAddStates(16, 18); + break; + + case 4: + if (curChar == 92) + jjstateSet[jjnewStateCnt++] = 3; + break; + + default: break; + + } + } + while (i != startsAt); + } + else + { + int hiByte = (int) (curChar >> 8); + int i1 = hiByte >> 6; + ulong l1 = (ulong) (1L << (hiByte & 63)); + int i2 = (curChar & 0xff) >> 6; + ulong l2 = (ulong) (1L << (curChar & 63)); + do + { + switch (jjstateSet[--i]) + { + + case 0: + if (JjCanMove_0(hiByte, i1, i2, l1, l2)) + { + if (kind > 7) + kind = 7; + } + if (JjCanMove_1(hiByte, i1, i2, l1, l2)) + { + if (kind > 33) + kind = 33; + JjCheckNAdd(6); + } + break; + + case 2: + if (JjCanMove_1(hiByte, i1, i2, l1, l2)) + JjAddStates(16, 18); + break; + + case 6: + if (!JjCanMove_1(hiByte, i1, i2, l1, l2)) + break; + if (kind > 33) + kind = 33; + JjCheckNAdd(6); + break; + + default: break; + + } + } + while (i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt))) + return curPos; + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + return curPos; + } + } + } + private int JjMoveStringLiteralDfa0_0() + { + return JjMoveNfa_0(0, 0); + } + private int JjMoveNfa_0(int startState, int curPos) + { + int startsAt = 0; + jjnewStateCnt = 3; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (; ; ) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + ulong l = (ulong) (1L << (int) curChar); + do + { + switch (jjstateSet[--i]) + { + + case 0: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 25) + kind = 25; + JjAddStates(19, 20); + break; + + case 1: + if (curChar == 46) + JjCheckNAdd(2); + break; + + case 2: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 25) + kind = 25; + JjCheckNAdd(2); + break; + + default: break; + + } + } + while (i != startsAt); + } + else if (curChar < 128) + { + ulong l = (ulong) (1L << (curChar & 63)); + do + { + switch (jjstateSet[--i]) + { + + default: break; + + } + } + while (i != startsAt); + } + else + { + int hiByte = (int) (curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 63); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 63); + do + { + switch (jjstateSet[--i]) + { + + default: break; + + } + } + while (i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) + return curPos; + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + return curPos; + } + } + } + private int JjStopStringLiteralDfa_2(int pos, long active0) + { + switch (pos) + { + + case 0: + if ((active0 & 0x4000000L) != 0L) + { + jjmatchedKind = 29; + return 6; + } + return - 1; + + default: + return - 1; + + } + } + private int JjStartNfa_2(int pos, long active0) + { + return JjMoveNfa_2(JjStopStringLiteralDfa_2(pos, active0), pos + 1); + } + private int JjMoveStringLiteralDfa0_2() + { + switch (curChar) + { + + case (char) (84): + return JjMoveStringLiteralDfa1_2(0x4000000L); + + case (char) (93): + return JjStopAtPos(0, 27); + + default: + return JjMoveNfa_2(0, 0); + + } + } + private int JjMoveStringLiteralDfa1_2(long active0) + { + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + JjStopStringLiteralDfa_2(0, active0); + return 1; + } + switch (curChar) + { + + case (char) (79): + if ((active0 & 0x4000000L) != 0L) + return JjStartNfaWithStates_2(1, 26, 6); + break; + + default: + break; + + } + return JjStartNfa_2(0, active0); + } + private int JjStartNfaWithStates_2(int pos, int kind, int state) + { + jjmatchedKind = kind; + jjmatchedPos = pos; + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + return pos + 1; + } + return JjMoveNfa_2(state, pos + 1); + } + private int JjMoveNfa_2(int startState, int curPos) + { + int startsAt = 0; + jjnewStateCnt = 7; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (; ; ) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + ulong l = (ulong) (1L << (int) curChar); + do + { + switch (jjstateSet[--i]) + { + + case 0: + if ((0xfffffffeffffffffL & l) != (ulong) 0L) + { + if (kind > 29) + kind = 29; + JjCheckNAdd(6); + } + if ((0x100002600L & l) != 0L) + { + if (kind > 7) + kind = 7; + } + else if (curChar == 34) + JjCheckNAddTwoStates(2, 4); + break; + + case 1: + if (curChar == 34) + JjCheckNAddTwoStates(2, 4); + break; + + case 2: + if ((0xfffffffbffffffffL & l) != (ulong) 0L) + JjCheckNAddStates(16, 18); + break; + + case 3: + if (curChar == 34) + JjCheckNAddStates(16, 18); + break; + + case 5: + if (curChar == 34 && kind > 28) + kind = 28; + break; + + case 6: + if ((0xfffffffeffffffffL & l) == (ulong) 0L) + break; + if (kind > 29) + kind = 29; + JjCheckNAdd(6); + break; + + default: break; + + } + } + while (i != startsAt); + } + else if (curChar < 128) + { + ulong l = (ulong) (1L << (curChar & 63)); + do + { + switch (jjstateSet[--i]) + { + + case 0: + case 6: + if ((0xffffffffdfffffffL & l) == (ulong) 0L) + break; + if (kind > 29) + kind = 29; + JjCheckNAdd(6); + break; + + case 2: + JjAddStates(16, 18); + break; + + case 4: + if (curChar == 92) + jjstateSet[jjnewStateCnt++] = 3; + break; + + default: break; + + } + } + while (i != startsAt); + } + else + { + int hiByte = (int) (curChar >> 8); + int i1 = hiByte >> 6; + ulong l1 = (ulong) (1L << (hiByte & 63)); + int i2 = (curChar & 0xff) >> 6; + ulong l2 = (ulong) (1L << (curChar & 63)); + do + { + switch (jjstateSet[--i]) + { + + case 0: + if (JjCanMove_0(hiByte, i1, i2, l1, l2)) + { + if (kind > 7) + kind = 7; + } + if (JjCanMove_1(hiByte, i1, i2, l1, l2)) + { + if (kind > 29) + kind = 29; + JjCheckNAdd(6); + } + break; + + case 2: + if (JjCanMove_1(hiByte, i1, i2, l1, l2)) + JjAddStates(16, 18); + break; + + case 6: + if (!JjCanMove_1(hiByte, i1, i2, l1, l2)) + break; + if (kind > 29) + kind = 29; + JjCheckNAdd(6); + break; + + default: break; + + } + } + while (i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt))) + return curPos; + try + { + curChar = input_stream.ReadChar(); + } + catch (System.IO.IOException) + { + return curPos; + } + } + } + internal static readonly int[] jjnextStates = new int[]{15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27, 2, 4, 5, 0, 1}; + private static bool JjCanMove_0(int hiByte, int i1, int i2, ulong l1, ulong l2) + { + switch (hiByte) + { + + case 48: + return ((jjbitVec0[i2] & l2) != (ulong) 0L); + + default: + return false; + + } + } + private static bool JjCanMove_1(int hiByte, int i1, int i2, ulong l1, ulong l2) + { + switch (hiByte) + { + + case 0: + return ((jjbitVec3[i2] & l2) != (ulong) 0L); + + default: + if ((jjbitVec1[i1] & l1) != (ulong) 0L) + return true; + return false; + + } + } + private static bool JjCanMove_2(int hiByte, int i1, int i2, ulong l1, ulong l2) + { + switch (hiByte) + { + + case 0: + return ((jjbitVec3[i2] & l2) != (ulong) 0L); + + case 48: + return ((jjbitVec1[i2] & l2) != (ulong) 0L); + + default: + if ((jjbitVec4[i1] & l1) != (ulong) 0L) + return true; + return false; + + } + } + + /// Token literal values. + public static readonly System.String[] jjstrLiteralImages = new System.String[]{"", null, null, null, null, null, null, null, null, null, null, "\x002B", "\x002D", "\x0028", "\x0029", "\x003A", "\x002A", "\x005E", null, null, null, null, null, "\x005B", "\x007B", null, "\x0054\x004F", "\x005D", null, null, "\x0054\x004F", "\x007D", null, null}; + + /// Lexer state names. + public static readonly System.String[] lexStateNames = new System.String[]{"Boost", "RangeEx", "RangeIn", "DEFAULT"}; + + /// Lex State array. + public static readonly int[] jjnewLexState = new int[]{- 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, 0, - 1, - 1, - 1, - 1, - 1, 2, 1, 3, - 1, 3, - 1, - 1, - 1, 3, - 1, - 1}; + internal static readonly ulong[] jjtoToken = new ulong[]{0x3ffffff01L}; + internal static readonly long[] jjtoSkip = new long[]{0x80L}; + protected internal ICharStream input_stream; + private uint[] jjrounds = new uint[36]; + private int[] jjstateSet = new int[72]; + protected internal char curChar; + /// Constructor. + public QueryParserTokenManager(ICharStream stream) + { + InitBlock(); + input_stream = stream; + } + + /// Constructor. + public QueryParserTokenManager(ICharStream stream, int lexState):this(stream) + { + SwitchTo(lexState); + } + + /// Reinitialise parser. + public virtual void ReInit(ICharStream stream) + { + jjmatchedPos = jjnewStateCnt = 0; + curLexState = defaultLexState; + input_stream = stream; + ReInitRounds(); + } + private void ReInitRounds() + { + int i; + jjround = 0x80000001; + for (i = 36; i-- > 0; ) + jjrounds[i] = 0x80000000; + } + + /// Reinitialise parser. + public virtual void ReInit(ICharStream stream, int lexState) + { + ReInit(stream); + SwitchTo(lexState); + } + + /// Switch to specified lex state. + public virtual void SwitchTo(int lexState) + { + if (lexState >= 4 || lexState < 0) + throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); + else + curLexState = lexState; + } + + protected internal virtual Token JjFillToken() + { + Token t; + System.String curTokenImage; + int beginLine; + int endLine; + int beginColumn; + int endColumn; + System.String im = jjstrLiteralImages[jjmatchedKind]; + curTokenImage = (im == null)?input_stream.Image:im; + beginLine = input_stream.BeginLine; + beginColumn = input_stream.BeginColumn; + endLine = input_stream.EndLine; + endColumn = input_stream.EndColumn; + t = Token.NewToken(jjmatchedKind, curTokenImage); + + t.beginLine = beginLine; + t.endLine = endLine; + t.beginColumn = beginColumn; + t.endColumn = endColumn; + + return t; + } + + internal int curLexState = 3; + internal int defaultLexState = 3; + internal int jjnewStateCnt; + internal uint jjround; + internal int jjmatchedPos; + internal int jjmatchedKind; + + /// Get the next Token. + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual Token GetNextToken() + { + Token matchedToken; + int curPos = 0; + + for (; ; ) + { + try + { + curChar = input_stream.BeginToken(); + } + catch (System.IO.IOException) + { + jjmatchedKind = 0; + matchedToken = JjFillToken(); + return matchedToken; + } + + switch (curLexState) + { + + case 0: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = JjMoveStringLiteralDfa0_0(); + break; + + case 1: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = JjMoveStringLiteralDfa0_1(); + break; + + case 2: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = JjMoveStringLiteralDfa0_2(); + break; + + case 3: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = JjMoveStringLiteralDfa0_3(); + break; + } + if (jjmatchedKind != 0x7fffffff) + { + if (jjmatchedPos + 1 < curPos) + input_stream.Backup(curPos - jjmatchedPos - 1); + if ((jjtoToken[jjmatchedKind >> 6] & ((ulong) 1L << (jjmatchedKind & 63))) != (ulong) 0L) + { + matchedToken = JjFillToken(); + if (jjnewLexState[jjmatchedKind] != - 1) + curLexState = jjnewLexState[jjmatchedKind]; + return matchedToken; + } + else + { + if (jjnewLexState[jjmatchedKind] != - 1) + curLexState = jjnewLexState[jjmatchedKind]; + goto EOFLoop; + } + } + int error_line = input_stream.EndLine; + int error_column = input_stream.EndColumn; + System.String error_after = null; + bool EOFSeen = false; + try + { + input_stream.ReadChar(); input_stream.Backup(1); + } + catch (System.IO.IOException) + { + EOFSeen = true; + error_after = curPos <= 1?"":input_stream.Image; + if (curChar == '\n' || curChar == '\r') + { + error_line++; + error_column = 0; + } + else + error_column++; + } + if (!EOFSeen) + { + input_stream.Backup(1); + error_after = curPos <= 1?"":input_stream.Image; + } + throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); + +EOFLoop: ; + } + } + + private void JjCheckNAdd(int state) + { + if (jjrounds[state] != jjround) + { + jjstateSet[jjnewStateCnt++] = state; + jjrounds[state] = jjround; + } + } + private void JjAddStates(int start, int end) + { + do + { + jjstateSet[jjnewStateCnt++] = jjnextStates[start]; + } + while (start++ != end); + } + private void JjCheckNAddTwoStates(int state1, int state2) + { + JjCheckNAdd(state1); + JjCheckNAdd(state2); + } + + private void JjCheckNAddStates(int start, int end) + { + do + { + JjCheckNAdd(jjnextStates[start]); + } + while (start++ != end); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/Token.cs b/external/Lucene.Net.Light/src/core/QueryParser/Token.cs new file mode 100644 index 0000000000..e3c51f87e2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/Token.cs @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */ +/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */ + +using System; + +namespace Lucene.Net.QueryParsers +{ + + /// Describes the input token stream. + + public class Token + { + + /// An integer that describes the kind of this token. This numbering + /// system is determined by JavaCCParser, and a table of these numbers is + /// stored in the file ...Constants.java. + /// + public int kind; + + /// The line number of the first character of this Token. + public int beginLine; + /// The column number of the first character of this Token. + public int beginColumn; + /// The line number of the last character of this Token. + public int endLine; + /// The column number of the last character of this Token. + public int endColumn; + + /// The string image of the token. + public System.String image; + + /// A reference to the next regular (non-special) token from the input + /// stream. If this is the last token from the input stream, or if the + /// token manager has not read tokens beyond this one, this field is + /// set to null. This is true only if this token is also a regular + /// token. Otherwise, see below for a description of the contents of + /// this field. + /// + public Token next; + + /// This field is used to access special tokens that occur prior to this + /// token, but after the immediately preceding regular (non-special) token. + /// If there are no such special tokens, this field is set to null. + /// When there are more than one such special token, this field refers + /// to the last of these special tokens, which in turn refers to the next + /// previous special token through its specialToken field, and so on + /// until the first special token (whose specialToken field is null). + /// The next fields of special tokens refer to other special tokens that + /// immediately follow it (without an intervening regular token). If there + /// is no such token, this field is null. + /// + public Token specialToken; + + /// An optional attribute value of the Token. + /// Tokens which are not used as syntactic sugar will often contain + /// meaningful values that will be used later on by the compiler or + /// interpreter. This attribute value is often different from the image. + /// Any subclass of Token that actually wants to return a non-null value can + /// override this method as appropriate. + /// + public virtual object Value + { + get { return null; } + } + + /// No-argument constructor + public Token() + { + } + + /// Constructs a new token for the specified Image. + public Token(int kind):this(kind, null) + { + } + + /// Constructs a new token for the specified Image and Kind. + public Token(int kind, System.String image) + { + this.kind = kind; + this.image = image; + } + + /// Returns the image. + public override System.String ToString() + { + return image; + } + + /// Returns a new Token object, by default. However, if you want, you + /// can create and return subclass objects based on the value of ofKind. + /// Simply add the cases to the switch for all those special cases. + /// For example, if you have a subclass of Token called IDToken that + /// you want to create if ofKind is ID, simply add something like : + /// + /// case MyParserConstants.ID : return new IDToken(ofKind, image); + /// + /// to the following switch statement. Then you can cast matchedToken + /// variable to the appropriate type and use sit in your lexical actions. + /// + public static Token NewToken(int ofKind, System.String image) + { + switch (ofKind) + { + + default: return new Token(ofKind, image); + + } + } + + public static Token NewToken(int ofKind) + { + return NewToken(ofKind, null); + } + } + /* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */ +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/QueryParser/TokenMgrError.cs b/external/Lucene.Net.Light/src/core/QueryParser/TokenMgrError.cs new file mode 100644 index 0000000000..d69f88cb89 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/QueryParser/TokenMgrError.cs @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */ +/* JavaCCOptions: */ + +using System; + +namespace Lucene.Net.QueryParsers +{ + + /// Token Manager Error. + [Serializable] + public class TokenMgrError:System.ApplicationException + { + /// You can also modify the body of this method to customize your error messages. + /// For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + /// of end-users concern, so you can return something like : + /// + /// "Internal Error : Please file a bug report .... " + /// + /// from this method for such cases in the release version of your parser. + /// + public override System.String Message + { + get + { + return base.Message; + } + + } + + /* + * Ordinals for various reasons why an Error of this type can be thrown. + */ + + /// Lexical error occurred. + internal const int LEXICAL_ERROR = 0; + + /// An attempt was made to create a second instance of a static token manager. + internal const int STATIC_LEXER_ERROR = 1; + + /// Tried to change to an invalid lexical state. + internal const int INVALID_LEXICAL_STATE = 2; + + /// Detected (and bailed out of) an infinite loop in the token manager. + internal const int LOOP_DETECTED = 3; + + /// Indicates the reason why the exception is thrown. It will have + /// one of the above 4 values. + /// + internal int errorCode; + + /// Replaces unprintable characters by their escaped (or unicode escaped) + /// equivalents in the given string + /// + protected internal static System.String addEscapes(System.String str) + { + System.Text.StringBuilder retval = new System.Text.StringBuilder(); + char ch; + for (int i = 0; i < str.Length; i++) + { + switch (str[i]) + { + + case (char) (0): + continue; + + case '\b': + retval.Append("\\b"); + continue; + + case '\t': + retval.Append("\\t"); + continue; + + case '\n': + retval.Append("\\n"); + continue; + + case '\f': + retval.Append("\\f"); + continue; + + case '\r': + retval.Append("\\r"); + continue; + + case '\"': + retval.Append("\\\""); + continue; + + case '\'': + retval.Append("\\\'"); + continue; + + case '\\': + retval.Append("\\\\"); + continue; + + default: + if ((ch = str[i]) < 0x20 || ch > 0x7e) + { + System.String s = "0000" + System.Convert.ToString(ch, 16); + retval.Append("\\u" + s.Substring(s.Length - 4, (s.Length) - (s.Length - 4))); + } + else + { + retval.Append(ch); + } + continue; + + } + } + return retval.ToString(); + } + + /// Returns a detailed message for the Error when it is thrown by the + /// token manager to indicate a lexical error. + /// Parameters : + /// EOFSeen : indicates if EOF caused the lexical error + /// curLexState : lexical state in which this error occurred + /// errorLine : line number when the error occurred + /// errorColumn : column number when the error occurred + /// errorAfter : prefix that was seen before this error occurred + /// curchar : the offending character + /// Note: You can customize the lexical error message by modifying this method. + /// + protected internal static System.String LexicalError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar) + { + return ("Lexical error at line " + errorLine + ", column " + errorColumn + ". Encountered: " + (EOFSeen?" ":("\"" + addEscapes(System.Convert.ToString(curChar)) + "\"") + " (" + (int) curChar + "), ") + "after : \"" + addEscapes(errorAfter) + "\""); + } + + /* + * Constructors of various flavors follow. + */ + + /// No arg constructor. + public TokenMgrError() + { + } + + /// Constructor with message and reason. + public TokenMgrError(System.String message, int reason):base(message) + { + errorCode = reason; + } + + /// Full Constructor. + public TokenMgrError(bool EOFSeen, int lexState, int errorLine, int errorColumn, System.String errorAfter, char curChar, int reason):this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason) + { + } + } + /* JavaCC - OriginalChecksum=1c94e13236c7e0121e49427992341ee3 (do not edit this line) */ +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/BooleanClause.cs b/external/Lucene.Net.Light/src/core/Search/BooleanClause.cs new file mode 100644 index 0000000000..ac37b80527 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/BooleanClause.cs @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// A clause in a BooleanQuery. + [Serializable] + public class BooleanClause + { + private Occur occur; + + /// Constructs a BooleanClause. + public BooleanClause(Query query, Occur occur) + { + this._query = query; + this.occur = occur; + } + + public virtual Occur Occur + { + get { return occur; } + set { this.occur = value; } + } + + private Query _query; + + /// The query whose matching documents are combined by the boolean query. + public virtual Query Query + { + get { return _query; } + set { this._query = value; } + } + + public virtual bool IsProhibited + { + get { return Occur.MUST_NOT.Equals(occur); } + } + + public virtual bool IsRequired + { + get { return Occur.MUST.Equals(occur); } + } + + + /// Returns true if o is equal to this. + public override bool Equals(System.Object o) + { + if (o == null || !(o is BooleanClause)) + return false; + BooleanClause other = (BooleanClause) o; + return this.Query.Equals(other.Query) && this.occur.Equals(other.occur); + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + return Query.GetHashCode() ^ (Occur.MUST.Equals(occur)?1:0) ^ (Occur.MUST_NOT.Equals(occur)?2:0); + } + + + public override System.String ToString() + { + return OccurExtensions.ToString(occur) + Query; + } + } + + public enum Occur + { + MUST, + SHOULD, + MUST_NOT + } + + public static class OccurExtensions + { + public static System.String ToString(this Occur occur) + { + if (occur == Occur.MUST) + return "+"; + if (occur == Occur.MUST_NOT) + return "-"; + return ""; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/BooleanQuery.cs b/external/Lucene.Net.Light/src/core/Search/BooleanQuery.cs new file mode 100644 index 0000000000..22b6371389 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/BooleanQuery.cs @@ -0,0 +1,599 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using Lucene.Net.Index; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using Occur = Lucene.Net.Search.Occur; + +namespace Lucene.Net.Search +{ + + /// A Query that matches documents matching boolean combinations of other + /// queries, e.g. s, s or other + /// BooleanQuerys. + /// + [Serializable] + public class BooleanQuery : Query, System.Collections.Generic.IEnumerable, System.ICloneable + { + [Serializable] + private class AnonymousClassSimilarityDelegator:SimilarityDelegator + { + private void InitBlock(BooleanQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private BooleanQuery enclosingInstance; + public BooleanQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassSimilarityDelegator(BooleanQuery enclosingInstance, Lucene.Net.Search.Similarity Param1):base(Param1) + { + InitBlock(enclosingInstance); + } + public override float Coord(int overlap, int maxOverlap) + { + return 1.0f; + } + } + + private static int _maxClauses = 1024; + + /// Thrown when an attempt is made to add more than + /// clauses. This typically happens if + /// a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery + /// is expanded to many terms during search. + /// + [Serializable] + public class TooManyClauses:System.SystemException + { + public override System.String Message + { + get + { + return "maxClauseCount is set to " + Lucene.Net.Search.BooleanQuery._maxClauses; + } + + } + } + + /// Gets or sets the maximum number of clauses permitted, 1024 by default. + /// Attempts to add more than the permitted number of clauses cause + /// to be thrown. + /// + public static int MaxClauseCount + { + get { return _maxClauses; } + set + { + if (value < 1) + throw new ArgumentException("maxClauseCount must be >= 1"); + _maxClauses = value; + } + } + + private EquatableList clauses = new EquatableList(); + private bool disableCoord; + + /// Constructs an empty boolean query. + public BooleanQuery() + { + } + + /// Constructs an empty boolean query. + /// + /// may be disabled in scoring, as + /// appropriate. For example, this score factor does not make sense for most + /// automatically generated queries, like and + ///. + /// + /// + /// disables in scoring. + /// + public BooleanQuery(bool disableCoord) + { + this.disableCoord = disableCoord; + } + + /// Returns true iff is disabled in + /// scoring for this query instance. + /// + /// + /// + public virtual bool IsCoordDisabled() + { + return disableCoord; + } + + // Implement coord disabling. + // Inherit javadoc. + public override Similarity GetSimilarity(Searcher searcher) + { + Similarity result = base.GetSimilarity(searcher); + if (disableCoord) + { + // disable coord as requested + result = new AnonymousClassSimilarityDelegator(this, result); + } + return result; + } + + protected internal int minNrShouldMatch = 0; + + /// + /// Specifies a minimum number of the optional BooleanClauses + /// which must be satisfied. + /// + /// By default no optional clauses are necessary for a match + /// (unless there are no required clauses). If this method is used, + /// then the specified number of clauses is required. + /// + /// + /// Use of this method is totally independent of specifying that + /// any specific clauses are required (or prohibited). This number will + /// only be compared against the number of matching optional clauses. + /// + /// + public virtual int MinimumNumberShouldMatch + { + set { this.minNrShouldMatch = value; } + get { return minNrShouldMatch; } + } + + /// Adds a clause to a boolean query. + /// + /// + /// TooManyClauses if the new number of clauses exceeds the maximum clause number + /// + /// + public virtual void Add(Query query, Occur occur) + { + Add(new BooleanClause(query, occur)); + } + + /// Adds a clause to a boolean query. + /// TooManyClauses if the new number of clauses exceeds the maximum clause number + /// + /// + public virtual void Add(BooleanClause clause) + { + if (clauses.Count >= _maxClauses) + throw new TooManyClauses(); + + clauses.Add(clause); + } + + /// Returns the set of clauses in this query. + public virtual BooleanClause[] GetClauses() + { + return clauses.ToArray(); + } + + /// Returns the list of clauses in this query. + public virtual System.Collections.Generic.List Clauses + { + get { return clauses; } + } + + /// + /// Returns an iterator on the clauses in this query. + /// + /// + public System.Collections.Generic.IEnumerator GetEnumerator() + { + return clauses.GetEnumerator(); + } + /// Expert: the Weight for BooleanQuery, used to + /// normalize, score and explain these queries. + /// + ///

NOTE: this API and implementation is subject to + /// change suddenly in the next release.

+ ///

+ [Serializable] + protected internal class BooleanWeight:Weight + { + private void InitBlock(BooleanQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private BooleanQuery enclosingInstance; + public BooleanQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + /// The Similarity implementation. + protected internal Similarity similarity; + protected internal System.Collections.Generic.List weights; + + public BooleanWeight(BooleanQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = Enclosing_Instance.GetSimilarity(searcher); + weights = new System.Collections.Generic.List(Enclosing_Instance.clauses.Count); + for (int i = 0; i < Enclosing_Instance.clauses.Count; i++) + { + weights.Add(Enclosing_Instance.clauses[i].Query.CreateWeight(searcher)); + } + } + + public override Query Query + { + get { return Enclosing_Instance; } + } + + public override float Value + { + get { return Enclosing_Instance.Boost; } + } + + public override float GetSumOfSquaredWeights() + { + float sum = 0.0f; + for (int i = 0; i < weights.Count; i++) + { + // call sumOfSquaredWeights for all clauses in case of side effects + float s = weights[i].GetSumOfSquaredWeights(); // sum sub weights + if (!Enclosing_Instance.clauses[i].IsProhibited) + // only add to sum for non-prohibited clauses + sum += s; + } + + sum *= Enclosing_Instance.Boost*Enclosing_Instance.Boost; // boost each sub-weight + + return sum; + } + + + public override void Normalize(float norm) + { + norm *= Enclosing_Instance.Boost; // incorporate boost + foreach (Weight w in weights) + { + // normalize all clauses, (even if prohibited in case of side affects) + w.Normalize(norm); + } + } + + public override Explanation Explain(IndexReader reader, int doc) + { + int minShouldMatch = Enclosing_Instance.MinimumNumberShouldMatch; + ComplexExplanation sumExpl = new ComplexExplanation(); + sumExpl.Description = "sum of:"; + int coord = 0; + int maxCoord = 0; + float sum = 0.0f; + bool fail = false; + int shouldMatchCount = 0; + System.Collections.Generic.IEnumerator cIter = Enclosing_Instance.clauses.GetEnumerator(); + for (System.Collections.Generic.IEnumerator wIter = weights.GetEnumerator(); wIter.MoveNext(); ) + { + cIter.MoveNext(); + Weight w = wIter.Current; + BooleanClause c = cIter.Current; + if (w.Scorer(reader, true, true) == null) + { + continue; + } + Explanation e = w.Explain(reader, doc); + if (!c.IsProhibited) + maxCoord++; + if (e.IsMatch) + { + if (!c.IsProhibited) + { + sumExpl.AddDetail(e); + sum += e.Value; + coord++; + } + else + { + Explanation r = new Explanation(0.0f, "match on prohibited clause (" + c.Query.ToString() + ")"); + r.AddDetail(e); + sumExpl.AddDetail(r); + fail = true; + } + if (c.Occur == Occur.SHOULD) + shouldMatchCount++; + } + else if (c.IsRequired) + { + Explanation r = new Explanation(0.0f, "no match on required clause (" + c.Query.ToString() + ")"); + r.AddDetail(e); + sumExpl.AddDetail(r); + fail = true; + } + } + if (fail) + { + System.Boolean tempAux = false; + sumExpl.Match = tempAux; + sumExpl.Value = 0.0f; + sumExpl.Description = "Failure to meet condition(s) of required/prohibited clause(s)"; + return sumExpl; + } + else if (shouldMatchCount < minShouldMatch) + { + System.Boolean tempAux2 = false; + sumExpl.Match = tempAux2; + sumExpl.Value = 0.0f; + sumExpl.Description = "Failure to match minimum number " + "of optional clauses: " + minShouldMatch; + return sumExpl; + } + + sumExpl.Match = 0 < coord?true:false; + sumExpl.Value = sum; + + float coordFactor = similarity.Coord(coord, maxCoord); + if (coordFactor == 1.0f) + // coord is no-op + return sumExpl; + // eliminate wrapper + else + { + ComplexExplanation result = new ComplexExplanation(sumExpl.IsMatch, sum * coordFactor, "product of:"); + result.AddDetail(sumExpl); + result.AddDetail(new Explanation(coordFactor, "coord(" + coord + "/" + maxCoord + ")")); + return result; + } + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + var required = new System.Collections.Generic.List(); + var prohibited = new System.Collections.Generic.List(); + var optional = new System.Collections.Generic.List(); + + System.Collections.Generic.IEnumerator cIter = Enclosing_Instance.clauses.GetEnumerator(); + foreach (Weight w in weights) + { + cIter.MoveNext(); + BooleanClause c = (BooleanClause) cIter.Current; + Scorer subScorer = w.Scorer(reader, true, false); + if (subScorer == null) + { + if (c.IsRequired) + { + return null; + } + } + else if (c.IsRequired) + { + required.Add(subScorer); + } + else if (c.IsProhibited) + { + prohibited.Add(subScorer); + } + else + { + optional.Add(subScorer); + } + } + + // Check if we can return a BooleanScorer + if (!scoreDocsInOrder && topScorer && required.Count == 0 && prohibited.Count < 32) + { + return new BooleanScorer(similarity, Enclosing_Instance.minNrShouldMatch, optional, prohibited); + } + + if (required.Count == 0 && optional.Count == 0) + { + // no required and optional clauses. + return null; + } + else if (optional.Count < Enclosing_Instance.minNrShouldMatch) + { + // either >1 req scorer, or there are 0 req scorers and at least 1 + // optional scorer. Therefore if there are not enough optional scorers + // no documents will be matched by the query + return null; + } + + // Return a BooleanScorer2 + return new BooleanScorer2(similarity, Enclosing_Instance.minNrShouldMatch, required, prohibited, optional); + } + + public override bool GetScoresDocsOutOfOrder() + { + int numProhibited = 0; + foreach (BooleanClause c in Enclosing_Instance.clauses) + { + if (c.IsRequired) + { + return false; // BS2 (in-order) will be used by scorer() + } + else if (c.IsProhibited) + { + ++numProhibited; + } + } + + if (numProhibited > 32) + { + // cannot use BS + return false; + } + + // scorer() will return an out-of-order scorer if requested. + return true; + } + } + + public override Weight CreateWeight(Searcher searcher) + { + return new BooleanWeight(this, searcher); + } + + public override Query Rewrite(IndexReader reader) + { + if (minNrShouldMatch == 0 && clauses.Count == 1) + { + // optimize 1-clause queries + BooleanClause c = clauses[0]; + if (!c.IsProhibited) + { + // just return clause + + Query query = c.Query.Rewrite(reader); // rewrite first + + if (Boost != 1.0f) + { + // incorporate boost + if (query == c.Query) + // if rewrite was no-op + query = (Query) query.Clone(); // then clone before boost + query.Boost = Boost * query.Boost; + } + + return query; + } + } + + BooleanQuery clone = null; // recursively rewrite + for (int i = 0; i < clauses.Count; i++) + { + BooleanClause c = clauses[i]; + Query query = c.Query.Rewrite(reader); + if (query != c.Query) + { + // clause rewrote: must clone + if (clone == null) + clone = (BooleanQuery) this.Clone(); + clone.clauses[i] = new BooleanClause(query, c.Occur); + } + } + if (clone != null) + { + return clone; // some clauses rewrote + } + else + return this; // no clauses rewrote + } + + // inherit javadoc + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + foreach(BooleanClause clause in clauses) + { + clause.Query.ExtractTerms(terms); + } + } + + public override System.Object Clone() + { + BooleanQuery clone = (BooleanQuery) base.Clone(); + clone.clauses = (EquatableList) this.clauses.Clone(); + return clone; + } + + /// Prints a user-readable version of this query. + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + bool needParens = (Boost != 1.0) || (MinimumNumberShouldMatch > 0); + if (needParens) + { + buffer.Append("("); + } + + for (int i = 0; i < clauses.Count; i++) + { + BooleanClause c = clauses[i]; + if (c.IsProhibited) + buffer.Append("-"); + else if (c.IsRequired) + buffer.Append("+"); + + Query subQuery = c.Query; + if (subQuery != null) + { + if (subQuery is BooleanQuery) + { + // wrap sub-bools in parens + buffer.Append("("); + buffer.Append(subQuery.ToString(field)); + buffer.Append(")"); + } + else + { + buffer.Append(subQuery.ToString(field)); + } + } + else + { + buffer.Append("null"); + } + + if (i != clauses.Count - 1) + buffer.Append(" "); + } + + if (needParens) + { + buffer.Append(")"); + } + + if (MinimumNumberShouldMatch > 0) + { + buffer.Append('~'); + buffer.Append(MinimumNumberShouldMatch); + } + + if (Boost != 1.0f) + { + buffer.Append(ToStringUtils.Boost(Boost)); + } + + return buffer.ToString(); + } + + /// Returns true iff o is equal to this. + public override bool Equals(System.Object o) + { + if (!(o is BooleanQuery)) + return false; + BooleanQuery other = (BooleanQuery)o; + return (this.Boost == other.Boost) + && this.clauses.Equals(other.clauses) + && this.MinimumNumberShouldMatch == other.MinimumNumberShouldMatch + && this.disableCoord == other.disableCoord; + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ clauses.GetHashCode() + MinimumNumberShouldMatch + (disableCoord ? 17 : 0); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/BooleanScorer.cs b/external/Lucene.Net.Light/src/core/Search/BooleanScorer.cs new file mode 100644 index 0000000000..1a4be8d99e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/BooleanScorer.cs @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /* Description from Doug Cutting (excerpted from + * LUCENE-1483): + * + * BooleanScorer uses a ~16k array to score windows of + * docs. So it scores docs 0-16k first, then docs 16-32k, + * etc. For each window it iterates through all query terms + * and accumulates a score in table[doc%16k]. It also stores + * in the table a bitmask representing which terms + * contributed to the score. Non-zero scores are chained in + * a linked list. At the end of scoring each window it then + * iterates through the linked list and, if the bitmask + * matches the boolean constraints, collects a hit. For + * boolean queries with lots of frequent terms this can be + * much faster, since it does not need to update a priority + * queue for each posting, instead performing constant-time + * operations per posting. The only downside is that it + * results in hits being delivered out-of-order within the + * window, which means it cannot be nested within other + * scorers. But it works well as a top-level scorer. + * + * The new BooleanScorer2 implementation instead works by + * merging priority queues of postings, albeit with some + * clever tricks. For example, a pure conjunction (all terms + * required) does not require a priority queue. Instead it + * sorts the posting streams at the start, then repeatedly + * skips the first to to the last. If the first ever equals + * the last, then there's a hit. When some terms are + * required and some terms are optional, the conjunction can + * be evaluated first, then the optional terms can all skip + * to the match and be added to the score. Thus the + * conjunction can reduce the number of priority queue + * updates for the optional terms. */ + + public sealed class BooleanScorer:Scorer + { + private void InitBlock() + { + bucketTable = new BucketTable(); + } + + private sealed class BooleanScorerCollector:Collector + { + private BucketTable bucketTable; + private int mask; + private Scorer scorer; + + public BooleanScorerCollector(int mask, BucketTable bucketTable) + { + this.mask = mask; + this.bucketTable = bucketTable; + } + public override void Collect(int doc) + { + BucketTable table = bucketTable; + int i = doc & Lucene.Net.Search.BooleanScorer.BucketTable.MASK; + Bucket bucket = table.buckets[i]; + if (bucket == null) + table.buckets[i] = bucket = new Bucket(); + + if (bucket.doc != doc) + { + // invalid bucket + bucket.doc = doc; // set doc + bucket.score = scorer.Score(); // initialize score + bucket.bits = mask; // initialize mask + bucket.coord = 1; // initialize coord + + bucket.next = table.first; // push onto valid list + table.first = bucket; + } + else + { + // valid bucket + bucket.score += scorer.Score(); // increment score + bucket.bits |= mask; // add bits in mask + bucket.coord++; // increment coord + } + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + // not needed by this implementation + } + + public override void SetScorer(Scorer scorer) + { + this.scorer = scorer; + } + + public override bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + // An internal class which is used in score(Collector, int) for setting the + // current score. This is required since Collector exposes a setScorer method + // and implementations that need the score will call scorer.score(). + // Therefore the only methods that are implemented are score() and doc(). + private sealed class BucketScorer:Scorer + { + + internal float score; + internal int doc = NO_MORE_DOCS; + + public BucketScorer():base(null) + { + } + + public override int Advance(int target) + { + return NO_MORE_DOCS; + } + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + return NO_MORE_DOCS; + } + + public override float Score() + { + return score; + } + } + + internal sealed class Bucket + { + internal int doc = - 1; // tells if bucket is valid + internal float score; // incremental score + internal int bits; // used for bool constraints + internal int coord; // count of terms in score + internal Bucket next; // next valid bucket + } + + /// A simple hash table of document scores within a range. + internal sealed class BucketTable + { + private void InitBlock() + { + buckets = new Bucket[SIZE]; + } + public const int SIZE = 1 << 11; + public static readonly int MASK; + + internal Bucket[] buckets; + internal Bucket first = null; // head of valid list + + public BucketTable() + { + InitBlock(); + } + + public Collector NewCollector(int mask) + { + return new BooleanScorerCollector(mask, this); + } + + public int Size() + { + return SIZE; + } + static BucketTable() + { + MASK = SIZE - 1; + } + } + + internal sealed class SubScorer + { + public Scorer scorer; + public bool required = false; + public bool prohibited = false; + public Collector collector; + public SubScorer next; + + public SubScorer(Scorer scorer, bool required, bool prohibited, Collector collector, SubScorer next) + { + this.scorer = scorer; + this.required = required; + this.prohibited = prohibited; + this.collector = collector; + this.next = next; + } + } + + private SubScorer scorers = null; + private BucketTable bucketTable; + private int maxCoord = 1; + private float[] coordFactors; + private int requiredMask = 0; + private int prohibitedMask = 0; + private int nextMask = 1; + private int minNrShouldMatch; + private int end; + private Bucket current; + private int doc = - 1; + + public /*internal*/ BooleanScorer(Similarity similarity, int minNrShouldMatch, + System.Collections.Generic.List optionalScorers, System.Collections.Generic.List prohibitedScorers) + : base(similarity) + { + InitBlock(); + this.minNrShouldMatch = minNrShouldMatch; + + if (optionalScorers != null && optionalScorers.Count > 0) + { + foreach (Scorer scorer in optionalScorers) + { + maxCoord++; + if (scorer.NextDoc() != NO_MORE_DOCS) + { + scorers = new SubScorer(scorer, false, false, bucketTable.NewCollector(0), scorers); + } + } + } + + if (prohibitedScorers != null && prohibitedScorers.Count > 0) + { + foreach(Scorer scorer in prohibitedScorers) + { + int mask = nextMask; + nextMask = nextMask << 1; + prohibitedMask |= mask; // update prohibited mask + if (scorer.NextDoc() != NO_MORE_DOCS) + { + scorers = new SubScorer(scorer, false, true, bucketTable.NewCollector(mask), scorers); + } + } + } + + coordFactors = new float[maxCoord]; + Similarity sim = Similarity; + for (int i = 0; i < maxCoord; i++) + { + coordFactors[i] = sim.Coord(i, maxCoord - 1); + } + } + + // firstDocID is ignored since nextDoc() initializes 'current' + public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID) + { + bool more; + Bucket tmp; + BucketScorer bs = new BucketScorer(); + // The internal loop will set the score and doc before calling collect. + collector.SetScorer(bs); + do + { + bucketTable.first = null; + + while (current != null) + { + // more queued + + // check prohibited & required + if ((current.bits & prohibitedMask) == 0 && (current.bits & requiredMask) == requiredMask) + { + + if (current.doc >= max) + { + tmp = current; + current = current.next; + tmp.next = bucketTable.first; + bucketTable.first = tmp; + continue; + } + + if (current.coord >= minNrShouldMatch) + { + bs.score = current.score * coordFactors[current.coord]; + bs.doc = current.doc; + collector.Collect(current.doc); + } + } + + current = current.next; // pop the queue + } + + if (bucketTable.first != null) + { + current = bucketTable.first; + bucketTable.first = current.next; + return true; + } + + // refill the queue + more = false; + end += BucketTable.SIZE; + for (SubScorer sub = scorers; sub != null; sub = sub.next) + { + int subScorerDocID = sub.scorer.DocID(); + if (subScorerDocID != NO_MORE_DOCS) + { + more |= sub.scorer.Score(sub.collector, end, subScorerDocID); + } + } + current = bucketTable.first; + } + while (current != null || more); + + return false; + } + + public override int Advance(int target) + { + throw new System.NotSupportedException(); + } + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + bool more; + do + { + while (bucketTable.first != null) + { + // more queued + current = bucketTable.first; + bucketTable.first = current.next; // pop the queue + + // check prohibited & required, and minNrShouldMatch + if ((current.bits & prohibitedMask) == 0 && (current.bits & requiredMask) == requiredMask && current.coord >= minNrShouldMatch) + { + return doc = current.doc; + } + } + + // refill the queue + more = false; + end += BucketTable.SIZE; + for (SubScorer sub = scorers; sub != null; sub = sub.next) + { + Scorer scorer = sub.scorer; + sub.collector.SetScorer(scorer); + int doc = scorer.DocID(); + while (doc < end) + { + sub.collector.Collect(doc); + doc = scorer.NextDoc(); + } + more |= (doc != NO_MORE_DOCS); + } + } + while (bucketTable.first != null || more); + + return this.doc = NO_MORE_DOCS; + } + + public override float Score() + { + return current.score * coordFactors[current.coord]; + } + + public override void Score(Collector collector) + { + Score(collector, System.Int32.MaxValue, NextDoc()); + } + + public override System.String ToString() + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("boolean("); + for (SubScorer sub = scorers; sub != null; sub = sub.next) + { + buffer.Append(sub.scorer.ToString()); + buffer.Append(" "); + } + buffer.Append(")"); + return buffer.ToString(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/BooleanScorer2.cs b/external/Lucene.Net.Light/src/core/Search/BooleanScorer2.cs new file mode 100644 index 0000000000..3c8c611529 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/BooleanScorer2.cs @@ -0,0 +1,417 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /* See the description in BooleanScorer.java, comparing + * BooleanScorer & BooleanScorer2 */ + + /// An alternative to BooleanScorer that also allows a minimum number + /// of optional scorers that should match. + ///
Implements skipTo(), and has no limitations on the numbers of added scorers. + ///
Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. + ///
+ class BooleanScorer2 : Scorer + { + private class AnonymousClassDisjunctionSumScorer:DisjunctionSumScorer + { + private void InitBlock(BooleanScorer2 enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private BooleanScorer2 enclosingInstance; + public BooleanScorer2 Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassDisjunctionSumScorer(BooleanScorer2 enclosingInstance, System.Collections.Generic.IList scorers, int minNrShouldMatch) + : base(scorers, minNrShouldMatch) + { + InitBlock(enclosingInstance); + } + private int lastScoredDoc = - 1; + // Save the score of lastScoredDoc, so that we don't compute it more than + // once in score(). + private float lastDocScore = System.Single.NaN; + public override float Score() + { + int doc = DocID(); + if (doc >= lastScoredDoc) + { + if (doc > lastScoredDoc) + { + lastDocScore = base.Score(); + lastScoredDoc = doc; + } + Enclosing_Instance.coordinator.nrMatchers += base.nrMatchers; + } + return lastDocScore; + } + } + private class AnonymousClassConjunctionScorer:ConjunctionScorer + { + private void InitBlock(int requiredNrMatchers, BooleanScorer2 enclosingInstance) + { + this.requiredNrMatchers = requiredNrMatchers; + this.enclosingInstance = enclosingInstance; + } + private int requiredNrMatchers; + private BooleanScorer2 enclosingInstance; + public BooleanScorer2 Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassConjunctionScorer(int requiredNrMatchers, BooleanScorer2 enclosingInstance, Lucene.Net.Search.Similarity defaultSimilarity, System.Collections.Generic.IList requiredScorers) + : base(defaultSimilarity, requiredScorers) + { + InitBlock(requiredNrMatchers, enclosingInstance); + } + private int lastScoredDoc = - 1; + // Save the score of lastScoredDoc, so that we don't compute it more than + // once in score(). + private float lastDocScore = System.Single.NaN; + public override float Score() + { + int doc = DocID(); + if (doc >= lastScoredDoc) + { + if (doc > lastScoredDoc) + { + lastDocScore = base.Score(); + lastScoredDoc = doc; + } + Enclosing_Instance.coordinator.nrMatchers += requiredNrMatchers; + } + // All scorers match, so defaultSimilarity super.score() always has 1 as + // the coordination factor. + // Therefore the sum of the scores of the requiredScorers + // is used as score. + return lastDocScore; + } + } + + private System.Collections.Generic.List requiredScorers; + private System.Collections.Generic.List optionalScorers; + private System.Collections.Generic.List prohibitedScorers; + + private class Coordinator + { + public Coordinator(BooleanScorer2 enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(BooleanScorer2 enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private BooleanScorer2 enclosingInstance; + public BooleanScorer2 Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal float[] coordFactors = null; + internal int maxCoord = 0; // to be increased for each non prohibited scorer + internal int nrMatchers; // to be increased by score() of match counting scorers. + + internal virtual void Init() + { + // use after all scorers have been added. + coordFactors = new float[maxCoord + 1]; + Similarity sim = Enclosing_Instance.Similarity; + for (int i = 0; i <= maxCoord; i++) + { + coordFactors[i] = sim.Coord(i, maxCoord); + } + } + } + + private Coordinator coordinator; + + /// The scorer to which all scoring will be delegated, + /// except for computing and using the coordination factor. + /// + private Scorer countingSumScorer; + + /// The number of optionalScorers that need to match (if there are any) + private int minNrShouldMatch; + + private int doc = - 1; + + /// Creates a with the given similarity and lists of required, + /// prohibited and optional scorers. In no required scorers are added, at least + /// one of the optional scorers will have to match during the search. + /// + /// + /// The similarity to be used. + /// + /// The minimum number of optional added scorers that should match + /// during the search. In case no required scorers are added, at least + /// one of the optional scorers will have to match during the search. + /// + /// the list of required scorers. + /// + /// the list of prohibited scorers. + /// + /// the list of optional scorers. + /// + public BooleanScorer2(Similarity similarity, int minNrShouldMatch, + System.Collections.Generic.List required, + System.Collections.Generic.List prohibited, + System.Collections.Generic.List optional) + : base(similarity) + { + if (minNrShouldMatch < 0) + { + throw new System.ArgumentException("Minimum number of optional scorers should not be negative"); + } + coordinator = new Coordinator(this); + this.minNrShouldMatch = minNrShouldMatch; + + optionalScorers = optional; + coordinator.maxCoord += optional.Count; + + requiredScorers = required; + coordinator.maxCoord += required.Count; + + prohibitedScorers = prohibited; + + coordinator.Init(); + countingSumScorer = MakeCountingSumScorer(); + } + + /// Count a scorer as a single match. + private class SingleMatchScorer:Scorer + { + private void InitBlock(BooleanScorer2 enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private BooleanScorer2 enclosingInstance; + public BooleanScorer2 Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private Scorer scorer; + private int lastScoredDoc = - 1; + // Save the score of lastScoredDoc, so that we don't compute it more than + // once in score(). + private float lastDocScore = System.Single.NaN; + + internal SingleMatchScorer(BooleanScorer2 enclosingInstance, Scorer scorer):base(scorer.Similarity) + { + InitBlock(enclosingInstance); + this.scorer = scorer; + } + public override float Score() + { + int doc = DocID(); + if (doc >= lastScoredDoc) + { + if (doc > lastScoredDoc) + { + lastDocScore = scorer.Score(); + lastScoredDoc = doc; + } + Enclosing_Instance.coordinator.nrMatchers++; + } + return lastDocScore; + } + + public override int DocID() + { + return scorer.DocID(); + } + + public override int NextDoc() + { + return scorer.NextDoc(); + } + + public override int Advance(int target) + { + return scorer.Advance(target); + } + } + + private Scorer CountingDisjunctionSumScorer(System.Collections.Generic.List scorers, int minNrShouldMatch) + { + // each scorer from the list counted as a single matcher + return new AnonymousClassDisjunctionSumScorer(this, scorers, minNrShouldMatch); + } + + private static readonly Similarity defaultSimilarity; + + private Scorer CountingConjunctionSumScorer(System.Collections.Generic.List requiredScorers) + { + // each scorer from the list counted as a single matcher + int requiredNrMatchers = requiredScorers.Count; + return new AnonymousClassConjunctionScorer(requiredNrMatchers, this, defaultSimilarity, requiredScorers); + } + + private Scorer DualConjunctionSumScorer(Scorer req1, Scorer req2) + { + // non counting. + return new ConjunctionScorer(defaultSimilarity, new Scorer[]{req1, req2}); + // All scorers match, so defaultSimilarity always has 1 as + // the coordination factor. + // Therefore the sum of the scores of two scorers + // is used as score. + } + + /// Returns the scorer to be used for match counting and score summing. + /// Uses requiredScorers, optionalScorers and prohibitedScorers. + /// + private Scorer MakeCountingSumScorer() + { + // each scorer counted as a single matcher + return (requiredScorers.Count == 0)?MakeCountingSumScorerNoReq():MakeCountingSumScorerSomeReq(); + } + + private Scorer MakeCountingSumScorerNoReq() + { + // No required scorers + // minNrShouldMatch optional scorers are required, but at least 1 + int nrOptRequired = (minNrShouldMatch < 1)?1:minNrShouldMatch; + Scorer requiredCountingSumScorer; + if (optionalScorers.Count > nrOptRequired) + requiredCountingSumScorer = CountingDisjunctionSumScorer(optionalScorers, nrOptRequired); + else if (optionalScorers.Count == 1) + requiredCountingSumScorer = new SingleMatchScorer(this, optionalScorers[0]); + else + requiredCountingSumScorer = CountingConjunctionSumScorer(optionalScorers); + return AddProhibitedScorers(requiredCountingSumScorer); + } + + private Scorer MakeCountingSumScorerSomeReq() + { + // At least one required scorer. + if (optionalScorers.Count == minNrShouldMatch) + { + // all optional scorers also required. + var allReq = new System.Collections.Generic.List(requiredScorers); + allReq.AddRange(optionalScorers); + return AddProhibitedScorers(CountingConjunctionSumScorer(allReq)); + } + else + { + // optionalScorers.size() > minNrShouldMatch, and at least one required scorer + Scorer requiredCountingSumScorer = + requiredScorers.Count == 1 + ? new SingleMatchScorer(this, requiredScorers[0]) + : CountingConjunctionSumScorer(requiredScorers); + if (minNrShouldMatch > 0) + { + // use a required disjunction scorer over the optional scorers + return AddProhibitedScorers(DualConjunctionSumScorer(requiredCountingSumScorer, CountingDisjunctionSumScorer(optionalScorers, minNrShouldMatch))); + } + else + { + // minNrShouldMatch == 0 + return new ReqOptSumScorer(AddProhibitedScorers(requiredCountingSumScorer), + optionalScorers.Count == 1 + ? new SingleMatchScorer(this, optionalScorers[0]) + : CountingDisjunctionSumScorer(optionalScorers, 1)); + } + } + } + + /// Returns the scorer to be used for match counting and score summing. + /// Uses the given required scorer and the prohibitedScorers. + /// + /// A required scorer already built. + /// + private Scorer AddProhibitedScorers(Scorer requiredCountingSumScorer) + { + return (prohibitedScorers.Count == 0) + ? requiredCountingSumScorer + : new ReqExclScorer(requiredCountingSumScorer, + ((prohibitedScorers.Count == 1) + ? prohibitedScorers[0] + : new DisjunctionSumScorer(prohibitedScorers))); + } + + /// Scores and collects all matching documents. + /// The collector to which all matching documents are passed through. + /// + public override void Score(Collector collector) + { + collector.SetScorer(this); + while ((doc = countingSumScorer.NextDoc()) != NO_MORE_DOCS) + { + collector.Collect(doc); + } + } + + public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID) + { + doc = firstDocID; + collector.SetScorer(this); + while (doc < max) + { + collector.Collect(doc); + doc = countingSumScorer.NextDoc(); + } + return doc != NO_MORE_DOCS; + } + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + return doc = countingSumScorer.NextDoc(); + } + + public override float Score() + { + coordinator.nrMatchers = 0; + float sum = countingSumScorer.Score(); + return sum * coordinator.coordFactors[coordinator.nrMatchers]; + } + + public override int Advance(int target) + { + return doc = countingSumScorer.Advance(target); + } + + static BooleanScorer2() + { + defaultSimilarity = Search.Similarity.Default; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/CachingSpanFilter.cs b/external/Lucene.Net.Light/src/core/Search/CachingSpanFilter.cs new file mode 100644 index 0000000000..89a6203de7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/CachingSpanFilter.cs @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using System.Runtime.InteropServices; +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// Wraps another SpanFilter's result and caches it. The purpose is to allow + /// filters to simply filter, and then wrap with this class to add caching. + /// + [Serializable] + public class CachingSpanFilter:SpanFilter + { + private SpanFilter filter; + + /// A transient Filter cache (internal because of test) + [NonSerialized] + internal CachingWrapperFilter.FilterCache cache; + + /// + /// New deletions always result in a cache miss, by default + /// (. + /// Filter to cache results of + /// + /// + public CachingSpanFilter(SpanFilter filter): this(filter, CachingWrapperFilter.DeletesMode.RECACHE) + { + + } + + /// New deletions always result in a cache miss, specify the + /// Filter to cache results of + /// See + public CachingSpanFilter(SpanFilter filter, CachingWrapperFilter.DeletesMode deletesMode) + { + this.filter = filter; + if (deletesMode == CachingWrapperFilter.DeletesMode.DYNAMIC) + { + throw new System.ArgumentException("DeletesMode.DYNAMIC is not supported"); + } + this.cache = new AnonymousFilterCache(deletesMode); + } + + class AnonymousFilterCache : CachingWrapperFilter.FilterCache + { + public AnonymousFilterCache(CachingWrapperFilter.DeletesMode deletesMode) : base(deletesMode) + { + } + + protected override SpanFilterResult MergeDeletes(IndexReader reader, SpanFilterResult docIdSet) + { + throw new System.ArgumentException("DeletesMode.DYNAMIC is not supported"); + } + } + + public override DocIdSet GetDocIdSet(IndexReader reader) + { + SpanFilterResult result = GetCachedResult(reader); + return result != null?result.DocIdSet:null; + } + + // for testing + public int hitCount, missCount; + + private SpanFilterResult GetCachedResult(IndexReader reader) + { + object coreKey = reader.FieldCacheKey; + object delCoreKey = reader.HasDeletions ? reader.DeletesCacheKey : coreKey; + + SpanFilterResult result = cache.Get(reader, coreKey, delCoreKey); + if (result != null) { + hitCount++; + return result; + } + + missCount++; + result = filter.BitSpans(reader); + + cache.Put(coreKey, delCoreKey, result); + return result; + } + + + public override SpanFilterResult BitSpans(IndexReader reader) + { + return GetCachedResult(reader); + } + + public override System.String ToString() + { + return "CachingSpanFilter(" + filter + ")"; + } + + public override bool Equals(System.Object o) + { + if (!(o is CachingSpanFilter)) + return false; + return this.filter.Equals(((CachingSpanFilter) o).filter); + } + + public override int GetHashCode() + { + return filter.GetHashCode() ^ 0x1117BF25; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/CachingWrapperFilter.cs b/external/Lucene.Net.Light/src/core/Search/CachingWrapperFilter.cs new file mode 100644 index 0000000000..4e8023a095 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/CachingWrapperFilter.cs @@ -0,0 +1,279 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Collections.Generic; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; +using OpenBitSetDISI = Lucene.Net.Util.OpenBitSetDISI; +using Lucene.Net.Util; + +namespace Lucene.Net.Search +{ + + /// Wraps another filter's result and caches it. The purpose is to allow + /// filters to simply filter, and then wrap with this class to add caching. + /// + [Serializable] + public class CachingWrapperFilter:Filter + { + protected internal Filter filter; + + /// + /// Expert: Specifies how new deletions against a reopened + /// reader should be handled. + /// + /// The default is IGNORE, which means the cache entry + /// will be re-used for a given segment, even when that + /// segment has been reopened due to changes in deletions. + /// This is a big performance gain, especially with + /// near-real-timer readers, since you don't hit a cache + /// miss on every reopened reader for prior segments. + /// + /// However, in some cases this can cause invalid query + /// results, allowing deleted documents to be returned. + /// This only happens if the main query does not rule out + /// deleted documents on its own, such as a toplevel + /// ConstantScoreQuery. To fix this, use RECACHE to + /// re-create the cached filter (at a higher per-reopen + /// cost, but at faster subsequent search performance), or + /// use DYNAMIC to dynamically intersect deleted docs (fast + /// reopen time but some hit to search performance). + /// + public enum DeletesMode { IGNORE, RECACHE, DYNAMIC } + + internal FilterCache cache; + + [Serializable] + abstract internal class FilterCache where T : class + { + /* + * A transient Filter cache (package private because of test) + */ + // NOTE: not final so that we can dynamically re-init + // after de-serialize + volatile IDictionary cache; + + private DeletesMode deletesMode; + + public FilterCache(DeletesMode deletesMode) + { + this.deletesMode = deletesMode; + } + + public T Get(IndexReader reader, object coreKey, object delCoreKey) + { + lock (this) + { + T value; + + if (cache == null) + { + cache = new WeakDictionary(); + } + + if (deletesMode == DeletesMode.IGNORE) + { + // key on core + value = cache[coreKey]; + } + else if (deletesMode == DeletesMode.RECACHE) + { + // key on deletes, if any, else core + value = cache[delCoreKey]; + } + else + { + + System.Diagnostics.Debug.Assert(deletesMode == DeletesMode.DYNAMIC); + + // first try for exact match + value = cache[delCoreKey]; + + if (value == null) + { + // now for core match, but dynamically AND NOT + // deletions + value = cache[coreKey]; + if (value != null && reader.HasDeletions) + { + value = MergeDeletes(reader, value); + } + } + } + return value; + } + + } + + protected abstract T MergeDeletes(IndexReader reader, T value); + + public void Put(object coreKey, object delCoreKey, T value) + { + lock (this) + { + if (deletesMode == DeletesMode.IGNORE) + { + cache[coreKey] = value; + } + else if (deletesMode == DeletesMode.RECACHE) + { + cache[delCoreKey] = value; + } + else + { + cache[coreKey] = value; + cache[delCoreKey] = value; + } + } + } + } + + /// + /// New deletes are ignored by default, which gives higher + /// cache hit rate on reopened readers. Most of the time + /// this is safe, because the filter will be AND'd with a + /// Query that fully enforces deletions. If instead you + /// need this filter to always enforce deletions, pass + /// either or + /// . + /// + /// Filter to cache results of + /// + public CachingWrapperFilter(Filter filter) : this(filter, DeletesMode.IGNORE) + { + } + + /// + /// Expert: by default, the cached filter will be shared + /// across reopened segments that only had changes to their + /// deletions. + /// + /// Filter to cache results of + /// See + /// + public CachingWrapperFilter(Filter filter, DeletesMode deletesMode) + { + this.filter = filter; + cache = new AnonymousFilterCache(deletesMode); + + //cache = new FilterCache(deletesMode) + // { + // public Object mergeDeletes(final IndexReader r, final Object docIdSet) { + // return new FilteredDocIdSet((DocIdSet) docIdSet) { + // protected boolean match(int docID) { + // return !r.isDeleted(docID); + // } + // }; + // } + //}; + } + + class AnonymousFilterCache : FilterCache + { + class AnonymousFilteredDocIdSet : FilteredDocIdSet + { + IndexReader r; + public AnonymousFilteredDocIdSet(DocIdSet innerSet, IndexReader r) : base(innerSet) + { + this.r = r; + } + public override bool Match(int docid) + { + return !r.IsDeleted(docid); + } + } + + public AnonymousFilterCache(DeletesMode deletesMode) : base(deletesMode) + { } + + protected override DocIdSet MergeDeletes(IndexReader reader, DocIdSet docIdSet) + { + return new AnonymousFilteredDocIdSet(docIdSet, reader); + } + } + + /// Provide the DocIdSet to be cached, using the DocIdSet provided + /// by the wrapped Filter. + /// This implementation returns the given DocIdSet. + /// + protected internal virtual DocIdSet DocIdSetToCache(DocIdSet docIdSet, IndexReader reader) + { + if (docIdSet == null) + { + // this is better than returning null, as the nonnull result can be cached + return DocIdSet.EMPTY_DOCIDSET; + } + else if (docIdSet.IsCacheable) { + return docIdSet; + } + else + { + DocIdSetIterator it = docIdSet.Iterator(); + // null is allowed to be returned by iterator(), + // in this case we wrap with the empty set, + // which is cacheable. + return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.MaxDoc); + } + } + + // for testing + public int hitCount, missCount; + + public override DocIdSet GetDocIdSet(IndexReader reader) + { + object coreKey = reader.FieldCacheKey; + object delCoreKey = reader.HasDeletions ? reader.DeletesCacheKey : coreKey; + + DocIdSet docIdSet = cache.Get(reader, coreKey, delCoreKey); + + if (docIdSet != null) + { + hitCount++; + return docIdSet; + } + missCount++; + // cache miss + docIdSet = DocIdSetToCache(filter.GetDocIdSet(reader), reader); + + if (docIdSet != null) + { + cache.Put(coreKey, delCoreKey, docIdSet); + } + + return docIdSet; + } + + public override System.String ToString() + { + return "CachingWrapperFilter(" + filter + ")"; + } + + public override bool Equals(System.Object o) + { + if (!(o is CachingWrapperFilter)) + return false; + return this.filter.Equals(((CachingWrapperFilter) o).filter); + } + + public override int GetHashCode() + { + return filter.GetHashCode() ^ 0x1117BF25; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Collector.cs b/external/Lucene.Net.Light/src/core/Search/Collector.cs new file mode 100644 index 0000000000..e1b02feb32 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Collector.cs @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + ///

Expert: Collectors are primarily meant to be used to + /// gather raw results from a search, and implement sorting + /// or custom result filtering, collation, etc.

+ /// + ///

Lucene's core collectors are derived from Collector. + /// Likely your application can use one of these classes, or + /// subclass , instead of + /// implementing Collector directly: + /// + /// + /// + /// is an abstract base class + /// that assumes you will retrieve the top N docs, + /// according to some criteria, after collection is + /// done. + /// + /// is a concrete subclass + /// and sorts according to score + + /// docID. This is used internally by the + /// search methods that do not take an + /// explicit . It is likely the most frequently + /// used collector. + /// + /// subclasses + /// and sorts according to a specified + /// object (sort by field). This is used + /// internally by the search methods + /// that take an explicit . + /// + /// , which wraps any other + /// Collector and aborts the search if it's taken too much + /// time. + /// + /// wraps any other + /// Collector and prevents collection of hits whose score + /// is <= 0.0 + /// + /// + /// + ///

Collector decouples the score from the collected doc: + /// the score computation is skipped entirely if it's not + /// needed. Collectors that do need the score should + /// implement the method, to hold onto the + /// passed instance, and call + /// within the collect method to compute the + /// current hit's score. If your collector may request the + /// score for a single hit multiple times, you should use + /// .

+ /// + ///

NOTE: The doc that is passed to the collect + /// method is relative to the current reader. If your + /// collector needs to resolve this to the docID space of the + /// Multi*Reader, you must re-base it by recording the + /// docBase from the most recent setNextReader call. Here's + /// a simple example showing how to collect docIDs into a + /// BitSet:

+ /// + /// + /// Searcher searcher = new IndexSearcher(indexReader); + /// final BitSet bits = new BitSet(indexReader.MaxDoc); + /// searcher.search(query, new Collector() { + /// private int docBase; + /// + /// // ignore scorer + /// public void setScorer(Scorer scorer) { + /// } + /// + /// // accept docs out of order (for a BitSet it doesn't matter) + /// public boolean acceptsDocsOutOfOrder() { + /// return true; + /// } + /// + /// public void collect(int doc) { + /// bits.set(doc + docBase); + /// } + /// + /// public void setNextReader(IndexReader reader, int docBase) { + /// this.docBase = docBase; + /// } + /// }); + /// + /// + ///

Not all collectors will need to rebase the docID. For + /// example, a collector that simply counts the total number + /// of hits would skip it.

+ /// + ///

NOTE: Prior to 2.9, Lucene silently filtered + /// out hits with score <= 0. As of 2.9, the core Collectors + /// no longer do that. It's very unusual to have such hits + /// (a negative query boost, or function query returning + /// negative custom scores, could cause it to happen). If + /// you need that behavior, use + ///.

+ /// + ///

NOTE: This API is experimental and might change + /// in incompatible ways in the next release.

+ /// + ///

+ /// 2.9 + /// + public abstract class Collector + { + + /// Called before successive calls to . Implementations + /// that need the score of the current document (passed-in to + /// ), should save the passed-in Scorer and call + /// scorer.score() when needed. + /// + public abstract void SetScorer(Scorer scorer); + + /// Called once for every document matching a query, with the unbased document + /// number. + /// + ///

+ /// Note: This is called in an inner search loop. For good search performance, + /// implementations of this method should not call or + /// on every hit. + /// Doing so can slow searches by an order of magnitude or more. + ///

+ public abstract void Collect(int doc); + + /// Called before collecting from each IndexReader. All doc ids in + /// will correspond to reader. + /// + /// Add docBase to the current IndexReaders internal document id to re-base ids + /// in . + /// + /// + /// next IndexReader + /// + /// + /// + public abstract void SetNextReader(IndexReader reader, int docBase); + + /// + /// Return true if this collector does not + /// require the matching docIDs to be delivered in int sort + /// order (smallest to largest) to . + ///

Most Lucene Query implementations will visit + /// matching docIDs in order. However, some queries + /// (currently limited to certain cases of ) + /// can achieve faster searching if the + /// Collector allows them to deliver the + /// docIDs out of order. + ///

Many collectors don't mind getting docIDs out of + /// order, so it's important to return true + /// here. + ///

+ /// + public abstract bool AcceptsDocsOutOfOrder { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ComplexExplanation.cs b/external/Lucene.Net.Light/src/core/Search/ComplexExplanation.cs new file mode 100644 index 0000000000..c794f1846d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ComplexExplanation.cs @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// Expert: Describes the score computation for document and query, and + /// can distinguish a match independent of a positive value. + /// + [Serializable] + public class ComplexExplanation:Explanation + { + private System.Boolean? match; + + public ComplexExplanation():base() + { + } + + public ComplexExplanation(bool match, float value_Renamed, System.String description):base(value_Renamed, description) + { + this.match = match; + } + + /// The match status of this explanation node. + /// May be null if match status is unknown + /// + public virtual bool? Match + { + get { return match; } + set { match = value; } + } + + /// Indicates whether or not this Explanation models a good match. + /// + ///

+ /// If the match status is explicitly set (i.e.: not null) this method + /// uses it; otherwise it defers to the superclass. + ///

+ ///

+ public override bool IsMatch + { + get + { + System.Boolean? m = Match; + return m ?? base.IsMatch; + } + } + + protected internal override string Summary + { + get + { + if (!match.HasValue) + return base.Summary; + + return Value + " = " + (IsMatch ? "(MATCH) " : "(NON-MATCH) ") + Description; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ConjunctionScorer.cs b/external/Lucene.Net.Light/src/core/Search/ConjunctionScorer.cs new file mode 100644 index 0000000000..6befe06d86 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ConjunctionScorer.cs @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; + +namespace Lucene.Net.Search +{ + + /// Scorer for conjunctions, sets of queries, all of which are required. + class ConjunctionScorer:Scorer + { + private Scorer[] scorers; + private float coord; + private int lastDoc = - 1; + + public ConjunctionScorer(Similarity similarity, System.Collections.Generic.ICollection scorers) + : this(similarity, scorers.ToArray()) + { + } + + public ConjunctionScorer(Similarity similarity, params Scorer[] scorers):base(similarity) + { + this.scorers = scorers; + coord = similarity.Coord(scorers.Length, scorers.Length); + + for (int i = 0; i < scorers.Length; i++) + { + if (scorers[i].NextDoc() == NO_MORE_DOCS) + { + // If even one of the sub-scorers does not have any documents, this + // scorer should not attempt to do any more work. + lastDoc = NO_MORE_DOCS; + return ; + } + } + + // Sort the array the first time... + // We don't need to sort the array in any future calls because we know + // it will already start off sorted (all scorers on same doc). + + // note that this comparator is not consistent with equals! + System.Array.Sort(scorers, (a, b) => a.DocID() - b.DocID()); + + // NOTE: doNext() must be called before the re-sorting of the array later on. + // The reason is this: assume there are 5 scorers, whose first docs are 1, + // 2, 3, 5, 5 respectively. Sorting (above) leaves the array as is. Calling + // doNext() here advances all the first scorers to 5 (or a larger doc ID + // they all agree on). + // However, if we re-sort before doNext() is called, the order will be 5, 3, + // 2, 1, 5 and then doNext() will stop immediately, since the first scorer's + // docs equals the last one. So the invariant that after calling doNext() + // all scorers are on the same doc ID is broken.); + if (DoNext() == NO_MORE_DOCS) + { + // The scorers did not agree on any document. + lastDoc = NO_MORE_DOCS; + return ; + } + + // If first-time skip distance is any predictor of + // scorer sparseness, then we should always try to skip first on + // those scorers. + // Keep last scorer in it's last place (it will be the first + // to be skipped on), but reverse all of the others so that + // they will be skipped on in order of original high skip. + int end = scorers.Length - 1; + int max = end >> 1; + for (int i = 0; i < max; i++) + { + Scorer tmp = scorers[i]; + int idx = end - i - 1; + scorers[i] = scorers[idx]; + scorers[idx] = tmp; + } + } + + private int DoNext() + { + int first = 0; + int doc = scorers[scorers.Length - 1].DocID(); + Scorer firstScorer; + while ((firstScorer = scorers[first]).DocID() < doc) + { + doc = firstScorer.Advance(doc); + first = first == scorers.Length - 1?0:first + 1; + } + return doc; + } + + public override int Advance(int target) + { + if (lastDoc == NO_MORE_DOCS) + { + return lastDoc; + } + else if (scorers[(scorers.Length - 1)].DocID() < target) + { + scorers[(scorers.Length - 1)].Advance(target); + } + return lastDoc = DoNext(); + } + + public override int DocID() + { + return lastDoc; + } + + public override int NextDoc() + { + if (lastDoc == NO_MORE_DOCS) + { + return lastDoc; + } + else if (lastDoc == - 1) + { + return lastDoc = scorers[scorers.Length - 1].DocID(); + } + scorers[(scorers.Length - 1)].NextDoc(); + return lastDoc = DoNext(); + } + + public override float Score() + { + float sum = 0.0f; + for (int i = 0; i < scorers.Length; i++) + { + sum += scorers[i].Score(); + } + return sum * coord; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ConstantScoreQuery.cs b/external/Lucene.Net.Light/src/core/Search/ConstantScoreQuery.cs new file mode 100644 index 0000000000..ff29023e12 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ConstantScoreQuery.cs @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// A query that wraps a filter and simply returns a constant score equal to the + /// query boost for every document in the filter. + /// + [Serializable] + public class ConstantScoreQuery:Query + { + protected internal Filter internalFilter; + + public ConstantScoreQuery(Filter filter) + { + this.internalFilter = filter; + } + + /// Returns the encapsulated filter + public virtual Filter Filter + { + get { return internalFilter; } + } + + public override Query Rewrite(IndexReader reader) + { + return this; + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + // OK to not add any terms when used for MultiSearcher, + // but may not be OK for highlighting + } + + [Serializable] + protected internal class ConstantWeight:Weight + { + private void InitBlock(ConstantScoreQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ConstantScoreQuery enclosingInstance; + public ConstantScoreQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private readonly Similarity similarity; + private float queryNorm; + private float queryWeight; + + public ConstantWeight(ConstantScoreQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = Enclosing_Instance.GetSimilarity(searcher); + } + + public override Query Query + { + get { return Enclosing_Instance; } + } + + public override float Value + { + get { return queryWeight; } + } + + public override float GetSumOfSquaredWeights() + { + queryWeight = Enclosing_Instance.Boost; + return queryWeight*queryWeight; + } + + public override void Normalize(float norm) + { + this.queryNorm = norm; + queryWeight *= this.queryNorm; + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + return new ConstantScorer(enclosingInstance, similarity, reader, this); + } + + public override Explanation Explain(IndexReader reader, int doc) + { + + var cs = new ConstantScorer(enclosingInstance, similarity, reader, this); + bool exists = cs.docIdSetIterator.Advance(doc) == doc; + + var result = new ComplexExplanation(); + + if (exists) + { + result.Description = "ConstantScoreQuery(" + Enclosing_Instance.internalFilter + "), product of:"; + result.Value = queryWeight; + System.Boolean tempAux = true; + result.Match = tempAux; + result.AddDetail(new Explanation(Enclosing_Instance.Boost, "boost")); + result.AddDetail(new Explanation(queryNorm, "queryNorm")); + } + else + { + result.Description = "ConstantScoreQuery(" + Enclosing_Instance.internalFilter + ") doesn't match id " + doc; + result.Value = 0; + System.Boolean tempAux2 = false; + result.Match = tempAux2; + } + return result; + } + } + + protected internal class ConstantScorer : Scorer + { + private void InitBlock(ConstantScoreQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ConstantScoreQuery enclosingInstance; + public ConstantScoreQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal DocIdSetIterator docIdSetIterator; + internal float theScore; + internal int doc = - 1; + + public ConstantScorer(ConstantScoreQuery enclosingInstance, Similarity similarity, IndexReader reader, Weight w):base(similarity) + { + InitBlock(enclosingInstance); + theScore = w.Value; + DocIdSet docIdSet = Enclosing_Instance.internalFilter.GetDocIdSet(reader); + if (docIdSet == null) + { + docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.Iterator(); + } + else + { + DocIdSetIterator iter = docIdSet.Iterator(); + if (iter == null) + { + docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.Iterator(); + } + else + { + docIdSetIterator = iter; + } + } + } + + public override int NextDoc() + { + return docIdSetIterator.NextDoc(); + } + + public override int DocID() + { + return docIdSetIterator.DocID(); + } + + public override float Score() + { + return theScore; + } + + public override int Advance(int target) + { + return docIdSetIterator.Advance(target); + } + } + + public override Weight CreateWeight(Searcher searcher) + { + return new ConstantScoreQuery.ConstantWeight(this, searcher); + } + + /// Prints a user-readable version of this query. + public override System.String ToString(string field) + { + return "ConstantScore(" + internalFilter + (Boost == 1.0?")":"^" + Boost); + } + + /// Returns true if o is equal to this. + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (!(o is ConstantScoreQuery)) + return false; + ConstantScoreQuery other = (ConstantScoreQuery) o; + return this.Boost == other.Boost && internalFilter.Equals(other.internalFilter); + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + // Simple add is OK since no existing filter hashcode has a float component. + return internalFilter.GetHashCode() + BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0); + } + + override public System.Object Clone() + { + // {{Aroush-1.9}} is this all that we need to clone?! + ConstantScoreQuery clone = (ConstantScoreQuery)base.Clone(); + clone.internalFilter = (Filter)this.internalFilter; + return clone; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/DefaultSimilarity.cs b/external/Lucene.Net.Light/src/core/Search/DefaultSimilarity.cs new file mode 100644 index 0000000000..6acbbeba5f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/DefaultSimilarity.cs @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using FieldInvertState = Lucene.Net.Index.FieldInvertState; + +namespace Lucene.Net.Search +{ + + /// Expert: Default scoring implementation. + [Serializable] + public class DefaultSimilarity:Similarity + { + + /// Implemented as + /// state.getBoost()*lengthNorm(numTerms), where + /// numTerms is if + /// is false, else it's + /// - + ///. + /// + ///

WARNING: This API is new and experimental, and may suddenly + /// change.

+ ///

+ public override float ComputeNorm(System.String field, FieldInvertState state) + { + int numTerms; + if (internalDiscountOverlaps) + numTerms = state.Length - state.NumOverlap; + else + numTerms = state.Length; + return (state.Boost * LengthNorm(field, numTerms)); + } + + /// Implemented as 1/sqrt(numTerms). + public override float LengthNorm(System.String fieldName, int numTerms) + { + return (float) (1.0 / System.Math.Sqrt(numTerms)); + } + + /// Implemented as 1/sqrt(sumOfSquaredWeights). + public override float QueryNorm(float sumOfSquaredWeights) + { + return (float) (1.0 / System.Math.Sqrt(sumOfSquaredWeights)); + } + + /// Implemented as sqrt(freq). + public override float Tf(float freq) + { + return (float) System.Math.Sqrt(freq); + } + + /// Implemented as 1 / (distance + 1). + public override float SloppyFreq(int distance) + { + return 1.0f / (distance + 1); + } + + /// Implemented as log(numDocs/(docFreq+1)) + 1. + public override float Idf(int docFreq, int numDocs) + { + return (float) (System.Math.Log(numDocs / (double) (docFreq + 1)) + 1.0); + } + + /// Implemented as overlap / maxOverlap. + public override float Coord(int overlap, int maxOverlap) + { + return overlap / (float) maxOverlap; + } + + /// + /// + // Default false + protected internal bool internalDiscountOverlaps; + + /// Determines whether overlap tokens (Tokens with + /// 0 position increment) are ignored when computing + /// norm. By default this is false, meaning overlap + /// tokens are counted just like non-overlap tokens. + /// + ///

WARNING: This API is new and experimental, and may suddenly + /// change.

+ /// + ///

+ /// + /// + public virtual bool DiscountOverlaps + { + get { return internalDiscountOverlaps; } + set { internalDiscountOverlaps = value; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/DisjunctionMaxQuery.cs b/external/Lucene.Net.Light/src/core/Search/DisjunctionMaxQuery.cs new file mode 100644 index 0000000000..f59e3dcf6e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/DisjunctionMaxQuery.cs @@ -0,0 +1,344 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum + /// score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. + /// This is useful when searching for a word in multiple fields with different boost factors (so that the fields cannot be + /// combined equivalently into a single search field). We want the primary score to be the one associated with the highest boost, + /// not the sum of the field scores (as BooleanQuery would give). + /// If the query is "albino elephant" this ensures that "albino" matching one field and "elephant" matching + /// another gets a higher score than "albino" matching both fields. + /// To get this result, use both BooleanQuery and DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in + /// each field, while the set of these DisjunctionMaxQuery's is combined into a BooleanQuery. + /// The tie breaker capability allows results that include the same term in multiple fields to be judged better than results that + /// include this term in only the best of those multiple fields, without confusing this with the better case of two different terms + /// in the multiple fields. + /// + [Serializable] + public class DisjunctionMaxQuery : Query, System.Collections.Generic.IEnumerable, System.ICloneable + { + + /* The subqueries */ + private EquatableList disjuncts = new EquatableList(); + + /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */ + private float tieBreakerMultiplier = 0.0f; + + /// Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. + /// the score of each non-maximum disjunct for a document is multiplied by this weight + /// and added into the final score. If non-zero, the value should be small, on the order of 0.1, which says that + /// 10 occurrences of word in a lower-scored field that is also in a higher scored field is just as good as a unique + /// word in the lower scored field (i.e., one that is not in any higher scored field. + /// + public DisjunctionMaxQuery(float tieBreakerMultiplier) + { + this.tieBreakerMultiplier = tieBreakerMultiplier; + } + + /// Creates a new DisjunctionMaxQuery + /// a Collection<Query> of all the disjuncts to add + /// + /// the weight to give to each matching non-maximum disjunct + /// + public DisjunctionMaxQuery(System.Collections.Generic.ICollection disjuncts, float tieBreakerMultiplier) + { + this.tieBreakerMultiplier = tieBreakerMultiplier; + Add(disjuncts); + } + + /// Add a subquery to this disjunction + /// the disjunct added + /// + public virtual void Add(Query query) + { + disjuncts.Add(query); + } + + /// Add a collection of disjuncts to this disjunction + /// via Iterable + /// + public virtual void Add(System.Collections.Generic.ICollection disjuncts) + { + this.disjuncts.AddRange(disjuncts); + } + + /// An Iterator<Query> over the disjuncts + public virtual System.Collections.Generic.IEnumerator GetEnumerator() + { + return disjuncts.GetEnumerator(); + } + + System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + /// Expert: the Weight for DisjunctionMaxQuery, used to + /// normalize, score and explain these queries. + /// + ///

NOTE: this API and implementation is subject to + /// change suddenly in the next release.

+ ///

+ [Serializable] + protected internal class DisjunctionMaxWeight:Weight + { + private void InitBlock(DisjunctionMaxQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private DisjunctionMaxQuery enclosingInstance; + public DisjunctionMaxQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + /// The Similarity implementation. + protected internal Similarity similarity; + + /// The Weights for our subqueries, in 1-1 correspondence with disjuncts + protected internal System.Collections.Generic.List weights = new System.Collections.Generic.List(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts + + /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ + public DisjunctionMaxWeight(DisjunctionMaxQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = searcher.Similarity; + foreach(Query disjunctQuery in enclosingInstance.disjuncts) + { + weights.Add(disjunctQuery.CreateWeight(searcher)); + } + } + + /* Return our associated DisjunctionMaxQuery */ + + public override Query Query + { + get { return Enclosing_Instance; } + } + + /* Return our boost */ + + public override float Value + { + get { return Enclosing_Instance.Boost; } + } + + /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */ + + public override float GetSumOfSquaredWeights() + { + float max = 0.0f, sum = 0.0f; + foreach (Weight currentWeight in weights) + { + float sub = currentWeight.GetSumOfSquaredWeights(); + sum += sub; + max = System.Math.Max(max, sub); + } + float boost = Enclosing_Instance.Boost; + return (((sum - max) * Enclosing_Instance.tieBreakerMultiplier * Enclosing_Instance.tieBreakerMultiplier) + max) * + boost * boost; + } + + /* Apply the computed normalization factor to our subqueries */ + public override void Normalize(float norm) + { + norm *= Enclosing_Instance.Boost; // Incorporate our boost + foreach(Weight wt in weights) + { + wt.Normalize(norm); + } + } + + /* Create the scorer used to score our associated DisjunctionMaxQuery */ + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + Scorer[] scorers = new Scorer[weights.Count]; + int idx = 0; + foreach(Weight w in weights) + { + Scorer subScorer = w.Scorer(reader, true, false); + if (subScorer != null && subScorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + scorers[idx++] = subScorer; + } + } + if (idx == 0) + return null; // all scorers did not have documents + DisjunctionMaxScorer result = new DisjunctionMaxScorer(Enclosing_Instance.tieBreakerMultiplier, similarity, scorers, idx); + return result; + } + + /* Explain the score we computed for doc */ + public override Explanation Explain(IndexReader reader, int doc) + { + if (Enclosing_Instance.disjuncts.Count == 1) + return weights[0].Explain(reader, doc); + ComplexExplanation result = new ComplexExplanation(); + float max = 0.0f, sum = 0.0f; + result.Description = Enclosing_Instance.tieBreakerMultiplier == 0.0f?"max of:":"max plus " + Enclosing_Instance.tieBreakerMultiplier + " times others of:"; + foreach(Weight wt in weights) + { + Explanation e = wt.Explain(reader, doc); + if (e.IsMatch) + { + System.Boolean tempAux = true; + result.Match = tempAux; + result.AddDetail(e); + sum += e.Value; + max = System.Math.Max(max, e.Value); + } + } + result.Value = max + (sum - max) * Enclosing_Instance.tieBreakerMultiplier; + return result; + } + } // end of DisjunctionMaxWeight inner class + + /* Create the Weight used to score us */ + public override Weight CreateWeight(Searcher searcher) + { + return new DisjunctionMaxWeight(this, searcher); + } + + /// Optimize our representation and our subqueries representations + /// the IndexReader we query + /// + /// an optimized copy of us (which may not be a copy if there is nothing to optimize) + /// + public override Query Rewrite(IndexReader reader) + { + int numDisjunctions = disjuncts.Count; + if (numDisjunctions == 1) + { + Query singleton = disjuncts[0]; + Query result = singleton.Rewrite(reader); + if (Boost != 1.0f) + { + if (result == singleton) + result = (Query) result.Clone(); + result.Boost = Boost * result.Boost; + } + return result; + } + DisjunctionMaxQuery clone = null; + for (int i = 0; i < numDisjunctions; i++) + { + Query clause = disjuncts[i]; + Query rewrite = clause.Rewrite(reader); + if (rewrite != clause) + { + if (clone == null) + clone = (DisjunctionMaxQuery) this.Clone(); + clone.disjuncts[i] = rewrite; + } + } + if (clone != null) + return clone; + else + return this; + } + + /// Create a shallow copy of us -- used in rewriting if necessary + /// a copy of us (but reuse, don't copy, our subqueries) + /// + public override System.Object Clone() + { + DisjunctionMaxQuery clone = (DisjunctionMaxQuery) base.Clone(); + clone.disjuncts = (EquatableList) this.disjuncts.Clone(); + return clone; + } + + // inherit javadoc + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + foreach(Query query in disjuncts) + { + query.ExtractTerms(terms); + } + } + + /// Prettyprint us. + /// the field to which we are applied + /// + /// a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" + /// + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("("); + int numDisjunctions = disjuncts.Count; + for (int i = 0; i < numDisjunctions; i++) + { + Query subquery = disjuncts[i]; + if (subquery is BooleanQuery) + { + // wrap sub-bools in parens + buffer.Append("("); + buffer.Append(subquery.ToString(field)); + buffer.Append(")"); + } + else + buffer.Append(subquery.ToString(field)); + if (i != numDisjunctions - 1) + buffer.Append(" | "); + } + buffer.Append(")"); + if (tieBreakerMultiplier != 0.0f) + { + buffer.Append("~"); + buffer.Append(tieBreakerMultiplier); + } + if (Boost != 1.0) + { + buffer.Append("^"); + buffer.Append(Boost); + } + return buffer.ToString(); + } + + /// Return true iff we represent the same query as o + /// another object + /// + /// true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us + /// + public override bool Equals(System.Object o) + { + if (!(o is DisjunctionMaxQuery)) + return false; + DisjunctionMaxQuery other = (DisjunctionMaxQuery) o; + return this.Boost == other.Boost && this.tieBreakerMultiplier == other.tieBreakerMultiplier && this.disjuncts.Equals(other.disjuncts); + } + + /// Compute a hash code for hashing us + /// the hash code + /// + public override int GetHashCode() + { + return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) + BitConverter.ToInt32(BitConverter.GetBytes(tieBreakerMultiplier), 0) + disjuncts.GetHashCode(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/DisjunctionMaxScorer.cs b/external/Lucene.Net.Light/src/core/Search/DisjunctionMaxScorer.cs new file mode 100644 index 0000000000..a476b6be06 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/DisjunctionMaxScorer.cs @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers + /// is generated in document number order. The score for each document is the maximum of the scores computed + /// by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores + /// for the other subqueries that generate the document. + /// + class DisjunctionMaxScorer:Scorer + { + + /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */ + private Scorer[] subScorers; + private int numScorers; + /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */ + private float tieBreakerMultiplier; + private int doc = - 1; + + /// Creates a new instance of DisjunctionMaxScorer + /// + /// + /// Multiplier applied to non-maximum-scoring subqueries for a + /// document as they are summed into the result. + /// + /// -- not used since our definition involves neither coord nor terms + /// directly + /// + /// The sub scorers this Scorer should iterate on + /// + /// The actual number of scorers to iterate on. Note that the array's + /// length may be larger than the actual number of scorers. + /// + public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity, Scorer[] subScorers, int numScorers):base(similarity) + { + + this.tieBreakerMultiplier = tieBreakerMultiplier; + // The passed subScorers array includes only scorers which have documents + // (DisjunctionMaxQuery takes care of that), and their nextDoc() was already + // called. + this.subScorers = subScorers; + this.numScorers = numScorers; + + Heapify(); + } + + public override int NextDoc() + { + if (numScorers == 0) + return doc = NO_MORE_DOCS; + while (subScorers[0].DocID() == doc) + { + if (subScorers[0].NextDoc() != NO_MORE_DOCS) + { + HeapAdjust(0); + } + else + { + HeapRemoveRoot(); + if (numScorers == 0) + { + return doc = NO_MORE_DOCS; + } + } + } + + return doc = subScorers[0].DocID(); + } + + public override int DocID() + { + return doc; + } + + /// Determine the current document score. Initially invalid, until is called the first time. + /// the score of the current generated document + /// + public override float Score() + { + int doc = subScorers[0].DocID(); + float[] sum = new float[]{subScorers[0].Score()}, max = new float[]{sum[0]}; + int size = numScorers; + ScoreAll(1, size, doc, sum, max); + ScoreAll(2, size, doc, sum, max); + return max[0] + (sum[0] - max[0]) * tieBreakerMultiplier; + } + + // Recursively iterate all subScorers that generated last doc computing sum and max + private void ScoreAll(int root, int size, int doc, float[] sum, float[] max) + { + if (root < size && subScorers[root].DocID() == doc) + { + float sub = subScorers[root].Score(); + sum[0] += sub; + max[0] = System.Math.Max(max[0], sub); + ScoreAll((root << 1) + 1, size, doc, sum, max); + ScoreAll((root << 1) + 2, size, doc, sum, max); + } + } + + public override int Advance(int target) + { + if (numScorers == 0) + return doc = NO_MORE_DOCS; + while (subScorers[0].DocID() < target) + { + if (subScorers[0].Advance(target) != NO_MORE_DOCS) + { + HeapAdjust(0); + } + else + { + HeapRemoveRoot(); + if (numScorers == 0) + { + return doc = NO_MORE_DOCS; + } + } + } + return doc = subScorers[0].DocID(); + } + + // Organize subScorers into a min heap with scorers generating the earliest document on top. + private void Heapify() + { + for (int i = (numScorers >> 1) - 1; i >= 0; i--) + { + HeapAdjust(i); + } + } + + /* The subtree of subScorers at root is a min heap except possibly for its root element. + * Bubble the root down as required to make the subtree a heap. + */ + private void HeapAdjust(int root) + { + Scorer scorer = subScorers[root]; + int doc = scorer.DocID(); + int i = root; + while (i <= (numScorers >> 1) - 1) + { + int lchild = (i << 1) + 1; + Scorer lscorer = subScorers[lchild]; + int ldoc = lscorer.DocID(); + int rdoc = System.Int32.MaxValue, rchild = (i << 1) + 2; + Scorer rscorer = null; + if (rchild < numScorers) + { + rscorer = subScorers[rchild]; + rdoc = rscorer.DocID(); + } + if (ldoc < doc) + { + if (rdoc < ldoc) + { + subScorers[i] = rscorer; + subScorers[rchild] = scorer; + i = rchild; + } + else + { + subScorers[i] = lscorer; + subScorers[lchild] = scorer; + i = lchild; + } + } + else if (rdoc < doc) + { + subScorers[i] = rscorer; + subScorers[rchild] = scorer; + i = rchild; + } + else + { + return ; + } + } + } + + // Remove the root Scorer from subScorers and re-establish it as a heap + private void HeapRemoveRoot() + { + if (numScorers == 1) + { + subScorers[0] = null; + numScorers = 0; + } + else + { + subScorers[0] = subScorers[numScorers - 1]; + subScorers[numScorers - 1] = null; + --numScorers; + HeapAdjust(0); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/DisjunctionSumScorer.cs b/external/Lucene.Net.Light/src/core/Search/DisjunctionSumScorer.cs new file mode 100644 index 0000000000..8d65ab898e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/DisjunctionSumScorer.cs @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using ScorerDocQueue = Lucene.Net.Util.ScorerDocQueue; + +namespace Lucene.Net.Search +{ + + /// A Scorer for OR like queries, counterpart of ConjunctionScorer. + /// This Scorer implements and uses skipTo() on the given Scorers. + /// + class DisjunctionSumScorer:Scorer + { + /// The number of subscorers. + private int nrScorers; + + /// The subscorers. + protected internal System.Collections.Generic.IList subScorers; + + /// The minimum number of scorers that should match. + private int minimumNrMatchers; + + /// The scorerDocQueue contains all subscorers ordered by their current doc(), + /// with the minimum at the top. + ///
The scorerDocQueue is initialized the first time next() or skipTo() is called. + ///
An exhausted scorer is immediately removed from the scorerDocQueue. + ///
If less than the minimumNrMatchers scorers + /// remain in the scorerDocQueue next() and skipTo() return false. + ///

+ /// After each to call to next() or skipTo() + /// currentSumScore is the total score of the current matching doc, + /// nrMatchers is the number of matching scorers, + /// and all scorers are after the matching doc, or are exhausted. + ///

+ private ScorerDocQueue scorerDocQueue; + + /// The document number of the current match. + private int currentDoc = - 1; + + /// The number of subscorers that provide the current match. + protected internal int nrMatchers = - 1; + + private float currentScore = System.Single.NaN; + + /// Construct a DisjunctionScorer. + /// A collection of at least two subscorers. + /// + /// The positive minimum number of subscorers that should + /// match to match this query. + ///
When minimumNrMatchers is bigger than + /// the number of subScorers, + /// no matches will be produced. + ///
When minimumNrMatchers equals the number of subScorers, + /// it more efficient to use ConjunctionScorer. + /// + public DisjunctionSumScorer(System.Collections.Generic.IList subScorers, int minimumNrMatchers):base(null) + { + + nrScorers = subScorers.Count; + + if (minimumNrMatchers <= 0) + { + throw new System.ArgumentException("Minimum nr of matchers must be positive"); + } + if (nrScorers <= 1) + { + throw new System.ArgumentException("There must be at least 2 subScorers"); + } + + this.minimumNrMatchers = minimumNrMatchers; + this.subScorers = subScorers; + + InitScorerDocQueue(); + } + + /// Construct a DisjunctionScorer, using one as the minimum number + /// of matching subscorers. + /// + public DisjunctionSumScorer(System.Collections.Generic.IList subScorers) + : this(subScorers, 1) + { + } + + /// Called the first time next() or skipTo() is called to + /// initialize scorerDocQueue. + /// + private void InitScorerDocQueue() + { + scorerDocQueue = new ScorerDocQueue(nrScorers); + foreach(Scorer se in subScorers) + { + if (se.NextDoc() != NO_MORE_DOCS) + { + // doc() method will be used in scorerDocQueue. + scorerDocQueue.Insert(se); + } + } + } + + /// Scores and collects all matching documents. + /// The collector to which all matching documents are passed through. + public override void Score(Collector collector) + { + collector.SetScorer(this); + while (NextDoc() != NO_MORE_DOCS) + { + collector.Collect(currentDoc); + } + } + + /// Expert: Collects matching documents in a range. Hook for optimization. + /// Note that must be called once before this method is called + /// for the first time. + /// + /// The collector to which all matching documents are passed through. + /// + /// Do not score documents past this. + /// + /// + /// true if more matching documents may remain. + /// + public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID) + { + // firstDocID is ignored since nextDoc() sets 'currentDoc' + collector.SetScorer(this); + while (currentDoc < max) + { + collector.Collect(currentDoc); + if (NextDoc() == NO_MORE_DOCS) + { + return false; + } + } + return true; + } + + public override int NextDoc() + { + if (scorerDocQueue.Size() < minimumNrMatchers || !AdvanceAfterCurrent()) + { + currentDoc = NO_MORE_DOCS; + } + return currentDoc; + } + + /// Advance all subscorers after the current document determined by the + /// top of the scorerDocQueue. + /// Repeat until at least the minimum number of subscorers match on the same + /// document and all subscorers are after that document or are exhausted. + ///
On entry the scorerDocQueue has at least minimumNrMatchers + /// available. At least the scorer with the minimum document number will be advanced. + ///
+ /// true iff there is a match. + ///
In case there is a match, currentDoc, currentSumScore, + /// and nrMatchers describe the match. + /// + /// TODO: Investigate whether it is possible to use skipTo() when + /// the minimum number of matchers is bigger than one, ie. try and use the + /// character of ConjunctionScorer for the minimum number of matchers. + /// Also delay calling score() on the sub scorers until the minimum number of + /// matchers is reached. + ///
For this, a Scorer array with minimumNrMatchers elements might + /// hold Scorers at currentDoc that are temporarily popped from scorerQueue. + ///
+ protected internal virtual bool AdvanceAfterCurrent() + { + do + { + // repeat until minimum nr of matchers + currentDoc = scorerDocQueue.TopDoc(); + currentScore = scorerDocQueue.TopScore(); + nrMatchers = 1; + do + { + // Until all subscorers are after currentDoc + if (!scorerDocQueue.TopNextAndAdjustElsePop()) + { + if (scorerDocQueue.Size() == 0) + { + break; // nothing more to advance, check for last match. + } + } + if (scorerDocQueue.TopDoc() != currentDoc) + { + break; // All remaining subscorers are after currentDoc. + } + currentScore += scorerDocQueue.TopScore(); + nrMatchers++; + } + while (true); + + if (nrMatchers >= minimumNrMatchers) + { + return true; + } + else if (scorerDocQueue.Size() < minimumNrMatchers) + { + return false; + } + } + while (true); + } + + /// Returns the score of the current document matching the query. + /// Initially invalid, until is called the first time. + /// + public override float Score() + { + return currentScore; + } + + public override int DocID() + { + return currentDoc; + } + + /// Returns the number of subscorers matching the current document. + /// Initially invalid, until is called the first time. + /// + public virtual int NrMatchers() + { + return nrMatchers; + } + + /// Advances to the first match beyond the current whose document number is + /// greater than or equal to a given target.
+ /// The implementation uses the skipTo() method on the subscorers. + /// + ///
+ /// The target document number. + /// + /// the document whose number is greater than or equal to the given + /// target, or -1 if none exist. + /// + public override int Advance(int target) + { + if (scorerDocQueue.Size() < minimumNrMatchers) + { + return currentDoc = NO_MORE_DOCS; + } + if (target <= currentDoc) + { + return currentDoc; + } + do + { + if (scorerDocQueue.TopDoc() >= target) + { + return AdvanceAfterCurrent()?currentDoc:(currentDoc = NO_MORE_DOCS); + } + else if (!scorerDocQueue.TopSkipToAndAdjustElsePop(target)) + { + if (scorerDocQueue.Size() < minimumNrMatchers) + { + return currentDoc = NO_MORE_DOCS; + } + } + } + while (true); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/DocIdSet.cs b/external/Lucene.Net.Light/src/core/Search/DocIdSet.cs new file mode 100644 index 0000000000..6c49dcd1ac --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/DocIdSet.cs @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// A DocIdSet contains a set of doc ids. Implementing classes must + /// only implement to provide access to the set. + /// + [Serializable] + public abstract class DocIdSet + { + public class AnonymousClassDocIdSet:DocIdSet + { + public AnonymousClassDocIdSet() + { + InitBlock(); + } + public class AnonymousClassDocIdSetIterator:DocIdSetIterator + { + public AnonymousClassDocIdSetIterator(AnonymousClassDocIdSet enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(AnonymousClassDocIdSet enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private AnonymousClassDocIdSet enclosingInstance; + public AnonymousClassDocIdSet Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public override int Advance(int target) + { + return NO_MORE_DOCS; + } + public override int DocID() + { + return NO_MORE_DOCS; + } + public override int NextDoc() + { + return NO_MORE_DOCS; + } + } + private void InitBlock() + { + iterator = new AnonymousClassDocIdSetIterator(this); + } + + private DocIdSetIterator iterator; + + public override DocIdSetIterator Iterator() + { + return iterator; + } + + public override bool IsCacheable + { + get { return true; } + } + } + + /// An empty instance for easy use, e.g. in Filters that hit no documents. + [NonSerialized] + public static readonly DocIdSet EMPTY_DOCIDSET; + + /// Provides a to access the set. + /// This implementation can return null or + /// EMPTY_DOCIDSET.Iterator() if there + /// are no docs that match. + /// + public abstract DocIdSetIterator Iterator(); + + /// This method is a hint for , if this DocIdSet + /// should be cached without copying it into a BitSet. The default is to return + /// false. If you have an own DocIdSet implementation + /// that does its iteration very effective and fast without doing disk I/O, + /// override this method and return true. + /// + public virtual bool IsCacheable + { + get { return false; } + } + + static DocIdSet() + { + EMPTY_DOCIDSET = new AnonymousClassDocIdSet(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/DocIdSetIterator.cs b/external/Lucene.Net.Light/src/core/Search/DocIdSetIterator.cs new file mode 100644 index 0000000000..1c7be51d19 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/DocIdSetIterator.cs @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace Lucene.Net.Search +{ + + /// This abstract class defines methods to iterate over a set of non-decreasing + /// doc ids. Note that this class assumes it iterates on doc Ids, and therefore + /// is set to Int32.MaxValue in order to be used as + /// a sentinel object. Implementations of this class are expected to consider + /// as an invalid value. + /// + public abstract class DocIdSetIterator + { + private int doc = - 1; + + /// When returned by , and + /// it means there are no more docs in the iterator. + /// + public static readonly int NO_MORE_DOCS = System.Int32.MaxValue; + + /// Returns the following: + /// + /// -1 or if or + /// were not called yet. + /// if the iterator has exhausted. + /// Otherwise it should return the doc ID it is currently on. + /// + ///

+ ///

+ public abstract int DocID(); + + /// Advances to the next document in the set and returns the doc it is + /// currently on, or if there are no more docs in the + /// set.
+ /// + /// NOTE: after the iterator has exhausted you should not call this + /// method, as it may result in unpredicted behavior. + /// + ///
+ public abstract int NextDoc(); + + /// Advances to the first beyond the current whose document number is greater + /// than or equal to target. Returns the current document number or + /// if there are no more docs in the set. + ///

+ /// Behaves as if written: + /// + /// + /// int advance(int target) { + /// int doc; + /// while ((doc = nextDoc()) < target) { + /// } + /// return doc; + /// } + /// + /// + /// Some implementations are considerably more efficient than that. + ///

+ /// NOTE: certain implemenations may return a different value (each + /// time) if called several times in a row with the same target. + ///

+ /// NOTE: this method may be called with for + /// efficiency by some Scorers. If your implementation cannot efficiently + /// determine that it should exhaust, it is recommended that you check for that + /// value in each call to this method. + ///

+ /// NOTE: after the iterator has exhausted you should not call this + /// method, as it may result in unpredicted behavior. + ///

+ /// + ///

+ /// 2.9 + public abstract int Advance(int target); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ExactPhraseScorer.cs b/external/Lucene.Net.Light/src/core/Search/ExactPhraseScorer.cs new file mode 100644 index 0000000000..481ee5436f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ExactPhraseScorer.cs @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Lucene.Net.Index; + +namespace Lucene.Net.Search +{ + + sealed class ExactPhraseScorer:PhraseScorer + { + + internal ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(weight, tps, offsets, similarity, norms) + { + } + + protected internal override float PhraseFreq() + { + // sort list with pq + pq.Clear(); + for (PhrasePositions pp = first; pp != null; pp = pp.next) + { + pp.FirstPosition(); + pq.Add(pp); // build pq from list + } + PqToList(); // rebuild list from pq + + // for counting how many times the exact phrase is found in current document, + // just count how many times all PhrasePosition's have exactly the same position. + int freq = 0; + do + { + // find position w/ all terms + while (first.position < last.position) + { + // scan forward in first + do + { + if (!first.NextPosition()) + return freq; + } + while (first.position < last.position); + FirstToLast(); + } + freq++; // all equal: a match + } + while (last.NextPosition()); + + return freq; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Explanation.cs b/external/Lucene.Net.Light/src/core/Search/Explanation.cs new file mode 100644 index 0000000000..0136db2773 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Explanation.cs @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Search +{ + + /// Expert: Describes the score computation for document and query. + [Serializable] + public class Explanation + { + private float value; // the value of this node + private System.String description; // what it represents + private List details; // sub-explanations + + public Explanation() + { + } + + public Explanation(float value, System.String description) + { + this.value = value; + this.description = description; + } + + /// Indicates whether or not this Explanation models a good match. + /// + ///

+ /// By default, an Explanation represents a "match" if the value is positive. + ///

+ ///

+ /// + /// + public virtual bool IsMatch + { + get { return (0.0f < Value); } + } + + + /// The value assigned to this explanation node. + public virtual float Value + { + get { return value; } + set { this.value = value; } + } + + /// A description of this explanation node. + public virtual string Description + { + get { return description; } + set { this.description = value; } + } + + /// A short one line summary which should contain all high level + /// information about this Explanation, without the "Details" + /// + protected internal virtual string Summary + { + get { return Value + " = " + Description; } + } + + /// The sub-nodes of this explanation node. + public virtual Explanation[] GetDetails() + { + if (details == null) + return null; + return details.ToArray(); + } + + /// Adds a sub-node to this explanation node. + public virtual void AddDetail(Explanation detail) + { + if (details == null) + details = new List(); + details.Add(detail); + } + + /// Render an explanation as text. + public override System.String ToString() + { + return ToString(0); + } + + protected internal virtual System.String ToString(int depth) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + for (int i = 0; i < depth; i++) + { + buffer.Append(" "); + } + buffer.Append(Summary); + buffer.Append("\n"); + + Explanation[] details = GetDetails(); + if (details != null) + { + for (int i = 0; i < details.Length; i++) + { + buffer.Append(details[i].ToString(depth + 1)); + } + } + + return buffer.ToString(); + } + + /// Render an explanation as HTML. + public virtual System.String ToHtml() + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("\n"); + + buffer.Append(""); + buffer.Append(Summary); + buffer.Append("
\n"); + + Explanation[] details = GetDetails(); + if (details != null) + { + for (int i = 0; i < details.Length; i++) + { + buffer.Append(details[i].ToHtml()); + } + } + + buffer.Append("
\n"); + buffer.Append("
\n"); + + return buffer.ToString(); + } + + /// Small Util class used to pass both an idf factor as well as an + /// explanation for that factor. + /// + /// This class will likely be held on a , so be aware + /// before storing any large or un-serializable fields. + /// + /// + [Serializable] + public abstract class IDFExplanation + { + /// the idf factor + public abstract float Idf { get; } + + /// This should be calculated lazily if possible. + /// + /// + /// the explanation for the idf factor. + /// + public abstract System.String Explain(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldCache.cs b/external/Lucene.Net.Light/src/core/Search/FieldCache.cs new file mode 100644 index 0000000000..e0ac588b50 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldCache.cs @@ -0,0 +1,708 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; +using Lucene.Net.Support; +using Double = Lucene.Net.Support.Double; +using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream; +using NumericField = Lucene.Net.Documents.NumericField; +using IndexReader = Lucene.Net.Index.IndexReader; +using NumericUtils = Lucene.Net.Util.NumericUtils; +using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator; +using Single = Lucene.Net.Support.Single; + +namespace Lucene.Net.Search +{ + + /// Expert: Maintains caches of term values. + /// + ///

Created: May 19, 2004 11:13:14 AM + /// + ///

+ /// lucene 1.4 + /// + /// $Id: FieldCache.java 807841 2009-08-25 22:27:31Z markrmiller $ + /// + /// + /// + public sealed class CreationPlaceholder + { + internal System.Object value_Renamed; + } + /// Expert: Stores term text values and document ordering data. + public class StringIndex + { + + public virtual int BinarySearchLookup(System.String key) + { + // this special case is the reason that Arrays.binarySearch() isn't useful. + if (key == null) + return 0; + + int low = 1; + int high = lookup.Length - 1; + + while (low <= high) + { + int mid = Number.URShift((low + high), 1); + int cmp = String.CompareOrdinal(lookup[mid], key); + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; // key found + } + return -(low + 1); // key not found. + } + + /// All the term values, in natural order. + public System.String[] lookup; + + /// For each document, an index into the lookup array. + public int[] order; + + /// Creates one of these objects + public StringIndex(int[] values, System.String[] lookup) + { + this.order = values; + this.lookup = lookup; + } + } + /// EXPERT: A unique Identifier/Description for each item in the FieldCache. + /// Can be useful for logging/debugging. + ///

+ /// EXPERIMENTAL API: This API is considered extremely advanced + /// and experimental. It may be removed or altered w/o warning in future + /// releases + /// of Lucene. + ///

+ ///

+ public abstract class CacheEntry + { + public abstract object ReaderKey { get; } + public abstract string FieldName { get; } + public abstract Type CacheType { get; } + public abstract object Custom { get; } + public abstract object Value { get; } + + /// + /// + public virtual void EstimateSize() + { + EstimateSize(new RamUsageEstimator(false)); // doesn't check for interned + } + /// Computes (and stores) the estimated size of the cache Value + /// + /// + public virtual void EstimateSize(RamUsageEstimator ramCalc) + { + long size = ramCalc.EstimateRamUsage(Value); + EstimatedSize = RamUsageEstimator.HumanReadableUnits(size, new System.Globalization.NumberFormatInfo()); // {{Aroush-2.9}} in Java, the formater is set to "0.#", so we need to do the same in C# + } + + /// The most recently estimated size of the value, null unless + /// estimateSize has been called. + /// + public string EstimatedSize { get; protected internal set; } + + + public override System.String ToString() + { + var b = new System.Text.StringBuilder(); + b.Append("'").Append(ReaderKey).Append("'=>"); + b.Append("'").Append(FieldName).Append("',"); + b.Append(CacheType).Append(",").Append(Custom); + b.Append("=>").Append(Value.GetType().FullName).Append("#"); + b.Append(Value.GetHashCode()); + + System.String s = EstimatedSize; + if (null != s) + { + b.Append(" (size =~ ").Append(s).Append(')'); + } + + return b.ToString(); + } + } + public struct FieldCache_Fields + { + /// Indicator for StringIndex values in the cache. + // NOTE: the value assigned to this constant must not be + // the same as any of those in SortField!! + public readonly static int STRING_INDEX = -1; + /// Expert: The cache used internally by sorting and range query classes. + public readonly static FieldCache DEFAULT; + /// The default parser for byte values, which are encoded by + public readonly static ByteParser DEFAULT_BYTE_PARSER; + /// The default parser for short values, which are encoded by + public readonly static ShortParser DEFAULT_SHORT_PARSER; + /// The default parser for int values, which are encoded by + public readonly static IntParser DEFAULT_INT_PARSER; + /// The default parser for float values, which are encoded by + public readonly static FloatParser DEFAULT_FLOAT_PARSER; + /// The default parser for long values, which are encoded by + public readonly static LongParser DEFAULT_LONG_PARSER; + /// The default parser for double values, which are encoded by + public readonly static DoubleParser DEFAULT_DOUBLE_PARSER; + /// A parser instance for int values encoded by , e.g. when indexed + /// via /. + /// + public readonly static IntParser NUMERIC_UTILS_INT_PARSER; + /// A parser instance for float values encoded with , e.g. when indexed + /// via /. + /// + public readonly static FloatParser NUMERIC_UTILS_FLOAT_PARSER; + /// A parser instance for long values encoded by , e.g. when indexed + /// via /. + /// + public readonly static LongParser NUMERIC_UTILS_LONG_PARSER; + /// A parser instance for double values encoded with , e.g. when indexed + /// via /. + /// + public readonly static DoubleParser NUMERIC_UTILS_DOUBLE_PARSER; + static FieldCache_Fields() + { + DEFAULT = new FieldCacheImpl(); + DEFAULT_BYTE_PARSER = new AnonymousClassByteParser(); + DEFAULT_SHORT_PARSER = new AnonymousClassShortParser(); + DEFAULT_INT_PARSER = new AnonymousClassIntParser(); + DEFAULT_FLOAT_PARSER = new AnonymousClassFloatParser(); + DEFAULT_LONG_PARSER = new AnonymousClassLongParser(); + DEFAULT_DOUBLE_PARSER = new AnonymousClassDoubleParser(); + NUMERIC_UTILS_INT_PARSER = new AnonymousClassIntParser1(); + NUMERIC_UTILS_FLOAT_PARSER = new AnonymousClassFloatParser1(); + NUMERIC_UTILS_LONG_PARSER = new AnonymousClassLongParser1(); + NUMERIC_UTILS_DOUBLE_PARSER = new AnonymousClassDoubleParser1(); + } + } + + [Serializable] + class AnonymousClassByteParser : ByteParser + { + public virtual sbyte ParseByte(System.String value_Renamed) + { + return System.SByte.Parse(value_Renamed); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.DEFAULT_BYTE_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".DEFAULT_BYTE_PARSER"; + } + } + [Serializable] + class AnonymousClassShortParser : ShortParser + { + public virtual short ParseShort(System.String value_Renamed) + { + return System.Int16.Parse(value_Renamed); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".DEFAULT_SHORT_PARSER"; + } + } + [Serializable] + class AnonymousClassIntParser : IntParser + { + public virtual int ParseInt(System.String value_Renamed) + { + return System.Int32.Parse(value_Renamed); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.DEFAULT_INT_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".DEFAULT_INT_PARSER"; + } + } + [Serializable] + class AnonymousClassFloatParser : FloatParser + { + public virtual float ParseFloat(System.String value_Renamed) + { + try + { + return Single.Parse(value_Renamed); + } + catch (System.OverflowException) + { + return value_Renamed.StartsWith("-") ? float.PositiveInfinity : float.NegativeInfinity; + } + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".DEFAULT_FLOAT_PARSER"; + } + } + [Serializable] + class AnonymousClassLongParser : LongParser + { + public virtual long ParseLong(System.String value_Renamed) + { + return System.Int64.Parse(value_Renamed); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.DEFAULT_LONG_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".DEFAULT_LONG_PARSER"; + } + } + [Serializable] + class AnonymousClassDoubleParser : DoubleParser + { + public virtual double ParseDouble(System.String value_Renamed) + { + return Double.Parse(value_Renamed); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.DEFAULT_DOUBLE_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".DEFAULT_DOUBLE_PARSER"; + } + } + [Serializable] + class AnonymousClassIntParser1 : IntParser + { + public virtual int ParseInt(System.String val) + { + int shift = val[0] - NumericUtils.SHIFT_START_INT; + if (shift > 0 && shift <= 31) + throw new FieldCacheImpl.StopFillCacheException(); + return NumericUtils.PrefixCodedToInt(val); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_INT_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".NUMERIC_UTILS_INT_PARSER"; + } + } + [Serializable] + class AnonymousClassFloatParser1 : FloatParser + { + public virtual float ParseFloat(System.String val) + { + int shift = val[0] - NumericUtils.SHIFT_START_INT; + if (shift > 0 && shift <= 31) + throw new FieldCacheImpl.StopFillCacheException(); + return NumericUtils.SortableIntToFloat(NumericUtils.PrefixCodedToInt(val)); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".NUMERIC_UTILS_FLOAT_PARSER"; + } + } + [Serializable] + class AnonymousClassLongParser1 : LongParser + { + public virtual long ParseLong(System.String val) + { + int shift = val[0] - NumericUtils.SHIFT_START_LONG; + if (shift > 0 && shift <= 63) + throw new FieldCacheImpl.StopFillCacheException(); + return NumericUtils.PrefixCodedToLong(val); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_LONG_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".NUMERIC_UTILS_LONG_PARSER"; + } + } + [Serializable] + class AnonymousClassDoubleParser1 : DoubleParser + { + public virtual double ParseDouble(System.String val) + { + int shift = val[0] - NumericUtils.SHIFT_START_LONG; + if (shift > 0 && shift <= 63) + throw new FieldCacheImpl.StopFillCacheException(); + return NumericUtils.SortableLongToDouble(NumericUtils.PrefixCodedToLong(val)); + } + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_DOUBLE_PARSER; + } + public override System.String ToString() + { + return typeof(FieldCache).FullName + ".NUMERIC_UTILS_DOUBLE_PARSER"; + } + } + + public interface FieldCache + { + + /// Checks the internal cache for an appropriate entry, and if none is + /// found, reads the terms in field as a single byte and returns an array + /// of size reader.MaxDoc of the value each document + /// has in the given field. + /// + /// Used to get field values. + /// + /// Which field contains the single byte values. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + sbyte[] GetBytes(IndexReader reader, System.String field); + + /// Checks the internal cache for an appropriate entry, and if none is found, + /// reads the terms in field as bytes and returns an array of + /// size reader.MaxDoc of the value each document has in the + /// given field. + /// + /// Used to get field values. + /// + /// Which field contains the bytes. + /// + /// Computes byte for string values. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + sbyte[] GetBytes(IndexReader reader, System.String field, ByteParser parser); + + /// Checks the internal cache for an appropriate entry, and if none is + /// found, reads the terms in field as shorts and returns an array + /// of size reader.MaxDoc of the value each document + /// has in the given field. + /// + /// Used to get field values. + /// + /// Which field contains the shorts. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + short[] GetShorts(IndexReader reader, System.String field); + + /// Checks the internal cache for an appropriate entry, and if none is found, + /// reads the terms in field as shorts and returns an array of + /// size reader.MaxDoc of the value each document has in the + /// given field. + /// + /// Used to get field values. + /// + /// Which field contains the shorts. + /// + /// Computes short for string values. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + short[] GetShorts(IndexReader reader, System.String field, ShortParser parser); + + /// Checks the internal cache for an appropriate entry, and if none is + /// found, reads the terms in field as integers and returns an array + /// of size reader.MaxDoc of the value each document + /// has in the given field. + /// + /// Used to get field values. + /// + /// Which field contains the integers. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + int[] GetInts(IndexReader reader, System.String field); + + /// Checks the internal cache for an appropriate entry, and if none is found, + /// reads the terms in field as integers and returns an array of + /// size reader.MaxDoc of the value each document has in the + /// given field. + /// + /// Used to get field values. + /// + /// Which field contains the integers. + /// + /// Computes integer for string values. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + int[] GetInts(IndexReader reader, System.String field, IntParser parser); + + /// Checks the internal cache for an appropriate entry, and if + /// none is found, reads the terms in field as floats and returns an array + /// of size reader.MaxDoc of the value each document + /// has in the given field. + /// + /// Used to get field values. + /// + /// Which field contains the floats. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + float[] GetFloats(IndexReader reader, System.String field); + + /// Checks the internal cache for an appropriate entry, and if + /// none is found, reads the terms in field as floats and returns an array + /// of size reader.MaxDoc of the value each document + /// has in the given field. + /// + /// Used to get field values. + /// + /// Which field contains the floats. + /// + /// Computes float for string values. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + float[] GetFloats(IndexReader reader, System.String field, FloatParser parser); + + /// Checks the internal cache for an appropriate entry, and if none is + /// found, reads the terms in field as longs and returns an array + /// of size reader.MaxDoc of the value each document + /// has in the given field. + /// + /// + /// Used to get field values. + /// + /// Which field contains the longs. + /// + /// The values in the given field for each document. + /// + /// java.io.IOException If any error occurs. + long[] GetLongs(IndexReader reader, System.String field); + + /// Checks the internal cache for an appropriate entry, and if none is found, + /// reads the terms in field as longs and returns an array of + /// size reader.MaxDoc of the value each document has in the + /// given field. + /// + /// + /// Used to get field values. + /// + /// Which field contains the longs. + /// + /// Computes integer for string values. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + long[] GetLongs(IndexReader reader, System.String field, LongParser parser); + + + /// Checks the internal cache for an appropriate entry, and if none is + /// found, reads the terms in field as integers and returns an array + /// of size reader.MaxDoc of the value each document + /// has in the given field. + /// + /// + /// Used to get field values. + /// + /// Which field contains the doubles. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + double[] GetDoubles(IndexReader reader, System.String field); + + /// Checks the internal cache for an appropriate entry, and if none is found, + /// reads the terms in field as doubles and returns an array of + /// size reader.MaxDoc of the value each document has in the + /// given field. + /// + /// + /// Used to get field values. + /// + /// Which field contains the doubles. + /// + /// Computes integer for string values. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + double[] GetDoubles(IndexReader reader, System.String field, DoubleParser parser); + + /// Checks the internal cache for an appropriate entry, and if none + /// is found, reads the term values in field and returns an array + /// of size reader.MaxDoc containing the value each document + /// has in the given field. + /// + /// Used to get field values. + /// + /// Which field contains the strings. + /// + /// The values in the given field for each document. + /// + /// IOException If any error occurs. + System.String[] GetStrings(IndexReader reader, System.String field); + + /// Checks the internal cache for an appropriate entry, and if none + /// is found reads the term values in field and returns + /// an array of them in natural order, along with an array telling + /// which element in the term array each document uses. + /// + /// Used to get field values. + /// + /// Which field contains the strings. + /// + /// Array of terms and index into the array for each document. + /// + /// IOException If any error occurs. + StringIndex GetStringIndex(IndexReader reader, System.String field); + + /// EXPERT: Generates an array of CacheEntry objects representing all items + /// currently in the FieldCache. + ///

+ /// NOTE: These CacheEntry objects maintain a strong refrence to the + /// Cached Values. Maintaining refrences to a CacheEntry the IndexReader + /// associated with it has garbage collected will prevent the Value itself + /// from being garbage collected when the Cache drops the WeakRefrence. + ///

+ ///

+ /// EXPERIMENTAL API: This API is considered extremely advanced + /// and experimental. It may be removed or altered w/o warning in future + /// releases + /// of Lucene. + ///

+ ///

+ CacheEntry[] GetCacheEntries(); + + ///

+ /// EXPERT: Instructs the FieldCache to forcibly expunge all entries + /// from the underlying caches. This is intended only to be used for + /// test methods as a way to ensure a known base state of the Cache + /// (with out needing to rely on GC to free WeakReferences). + /// It should not be relied on for "Cache maintenance" in general + /// application code. + ///

+ ///

+ /// EXPERIMENTAL API: This API is considered extremely advanced + /// and experimental. It may be removed or altered w/o warning in future + /// releases + /// of Lucene. + ///

+ ///

+ void PurgeAllCaches(); + + /// + /// Expert: drops all cache entries associated with this + /// reader. NOTE: this reader must precisely match the + /// reader that the cache entry is keyed on. If you pass a + /// top-level reader, it usually will have no effect as + /// Lucene now caches at the segment reader level. + /// + void Purge(IndexReader r); + + /// Gets or sets the InfoStream for this FieldCache. + /// If non-null, FieldCacheImpl will warn whenever + /// entries are created that are not sane according to + /// . + /// + /// + StreamWriter InfoStream { get; set; } + } + + /// Marker interface as super-interface to all parsers. It + /// is used to specify a custom parser to . + /// + public interface Parser + { + } + + /// Interface to parse bytes from document fields. + /// + /// + public interface ByteParser : Parser + { + /// Return a single Byte representation of this field's value. + sbyte ParseByte(System.String string_Renamed); + } + + /// Interface to parse shorts from document fields. + /// + /// + public interface ShortParser : Parser + { + /// Return a short representation of this field's value. + short ParseShort(System.String string_Renamed); + } + + /// Interface to parse ints from document fields. + /// + /// + public interface IntParser : Parser + { + /// Return an integer representation of this field's value. + int ParseInt(System.String string_Renamed); + } + + /// Interface to parse floats from document fields. + /// + /// + public interface FloatParser : Parser + { + /// Return an float representation of this field's value. + float ParseFloat(System.String string_Renamed); + } + + /// Interface to parse long from document fields. + /// + /// + /// Use , this will be removed in Lucene 3.0 + /// + public interface LongParser : Parser + { + /// Return an long representation of this field's value. + long ParseLong(System.String string_Renamed); + } + + /// Interface to parse doubles from document fields. + /// + /// + /// Use , this will be removed in Lucene 3.0 + /// + public interface DoubleParser : Parser + { + /// Return an long representation of this field's value. + double ParseDouble(System.String string_Renamed); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldCacheImpl.cs b/external/Lucene.Net.Light/src/core/Search/FieldCacheImpl.cs new file mode 100644 index 0000000000..6c24dcc521 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldCacheImpl.cs @@ -0,0 +1,858 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using Lucene.Net.Support; +using NumericField = Lucene.Net.Documents.NumericField; +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using TermDocs = Lucene.Net.Index.TermDocs; +using TermEnum = Lucene.Net.Index.TermEnum; +using FieldCacheSanityChecker = Lucene.Net.Util.FieldCacheSanityChecker; +using Single = Lucene.Net.Support.Single; +using StringHelper = Lucene.Net.Util.StringHelper; + +namespace Lucene.Net.Search +{ + + /// Expert: The default cache implementation, storing all values in memory. + /// A WeakDictionary is used for storage. + /// + ///

Created: May 19, 2004 4:40:36 PM + /// + ///

+ /// lucene 1.4 + /// + class FieldCacheImpl : FieldCache + { + private IDictionary caches; + + internal FieldCacheImpl() + { + Init(); + } + private void Init() + { + lock (this) + { + caches = new HashMap(7); + caches[typeof(sbyte)] = new ByteCache(this); + caches[typeof(short)] = new ShortCache(this); + caches[typeof(int)] = new IntCache(this); + caches[typeof(float)] = new FloatCache(this); + caches[typeof(long)] = new LongCache(this); + caches[typeof(double)] = new DoubleCache(this); + caches[typeof(string)] = new StringCache(this); + caches[typeof(StringIndex)] = new StringIndexCache(this); + } + } + + // lucene.net: java version 3.0.3 with patch in rev. 912330 applied: + // uschindler 21/02/2010 12:16:42 LUCENE-2273: Fixed bug in FieldCacheImpl.getCacheEntries() that used + // WeakHashMap incorrectly and lead to ConcurrentModificationException + public virtual void PurgeAllCaches() + { + lock (this) + { + Init(); + } + } + + // lucene.net: java version 3.0.3 with patch in rev. 912330 applied: + // uschindler 21/02/2010 12:16:42 LUCENE-2273: Fixed bug in FieldCacheImpl.getCacheEntries() that used + // WeakHashMap incorrectly and lead to ConcurrentModificationException + public void Purge(IndexReader r) + { + lock (this) + { + foreach (Cache c in caches.Values) + { + c.Purge(r); + } + } + } + + // lucene.net: java version 3.0.3 with patch in rev. 912330 applied: + // uschindler 21/02/2010 12:16:42 LUCENE-2273: Fixed bug in FieldCacheImpl.getCacheEntries() that used + // WeakHashMap incorrectly and lead to ConcurrentModificationException + public virtual CacheEntry[] GetCacheEntries() + { + lock (this) + { + IList result = new List(17); + foreach (var cacheEntry in caches) + { + var cache = cacheEntry.Value; + var cacheType = cacheEntry.Key; + lock (cache.readerCache) + { + foreach (var readerCacheEntry in cache.readerCache) + { + var readerKey = readerCacheEntry.Key; + var innerCache = readerCacheEntry.Value; + foreach (var mapEntry in innerCache) + { + Entry entry = mapEntry.Key; + result.Add(new CacheEntryImpl(readerKey, entry.field, cacheType, entry.custom, mapEntry.Value)); + } + } + } + } + return result.ToArray(); + } + } + + private sealed class CacheEntryImpl : CacheEntry + { + private System.Object readerKey; + private System.String fieldName; + private System.Type cacheType; + private System.Object custom; + private System.Object value; + internal CacheEntryImpl(System.Object readerKey, System.String fieldName, System.Type cacheType, System.Object custom, System.Object value) + { + this.readerKey = readerKey; + this.fieldName = fieldName; + this.cacheType = cacheType; + this.custom = custom; + this.value = value; + + // :HACK: for testing. + // if (null != locale || SortField.CUSTOM != sortFieldType) { + // throw new RuntimeException("Locale/sortFieldType: " + this); + // } + } + + public override object ReaderKey + { + get { return readerKey; } + } + + public override string FieldName + { + get { return fieldName; } + } + + public override Type CacheType + { + get { return cacheType; } + } + + public override object Custom + { + get { return custom; } + } + + public override object Value + { + get { return value; } + } + } + + /// Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops + /// processing terms and returns the current FieldCache + /// array. + /// + [Serializable] + internal sealed class StopFillCacheException:System.SystemException + { + } + + /// Expert: Internal cache. + internal abstract class Cache + { + internal Cache() + { + this.wrapper = null; + } + + internal Cache(FieldCache wrapper) + { + this.wrapper = wrapper; + } + + internal FieldCache wrapper; + + internal IDictionary> readerCache = new WeakDictionary>(); + + protected internal abstract System.Object CreateValue(IndexReader reader, Entry key); + + /* Remove this reader from the cache, if present. */ + public void Purge(IndexReader r) + { + object readerKey = r.FieldCacheKey; + lock (readerCache) + { + readerCache.Remove(readerKey); + } + } + + public virtual System.Object Get(IndexReader reader, Entry key) + { + IDictionary innerCache; + System.Object value; + System.Object readerKey = reader.FieldCacheKey; + lock (readerCache) + { + innerCache = readerCache[readerKey]; + if (innerCache == null) + { + innerCache = new HashMap(); + readerCache[readerKey] = innerCache; + value = null; + } + else + { + value = innerCache[key]; + } + if (value == null) + { + value = new CreationPlaceholder(); + innerCache[key] = value; + } + } + if (value is CreationPlaceholder) + { + lock (value) + { + CreationPlaceholder progress = (CreationPlaceholder) value; + if (progress.value_Renamed == null) + { + progress.value_Renamed = CreateValue(reader, key); + lock (readerCache) + { + innerCache[key] = progress.value_Renamed; + } + + // Only check if key.custom (the parser) is + // non-null; else, we check twice for a single + // call to FieldCache.getXXX + if (key.custom != null && wrapper != null) + { + System.IO.StreamWriter infoStream = wrapper.InfoStream; + if (infoStream != null) + { + PrintNewInsanity(infoStream, progress.value_Renamed); + } + } + } + return progress.value_Renamed; + } + } + return value; + } + + private void PrintNewInsanity(System.IO.StreamWriter infoStream, System.Object value_Renamed) + { + FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.CheckSanity(wrapper); + for (int i = 0; i < insanities.Length; i++) + { + FieldCacheSanityChecker.Insanity insanity = insanities[i]; + CacheEntry[] entries = insanity.GetCacheEntries(); + for (int j = 0; j < entries.Length; j++) + { + if (entries[j].Value == value_Renamed) + { + // OK this insanity involves our entry + infoStream.WriteLine("WARNING: new FieldCache insanity created\nDetails: " + insanity.ToString()); + infoStream.WriteLine("\nStack:\n"); + infoStream.WriteLine(new System.Exception()); + break; + } + } + } + } + } + + /// Expert: Every composite-key in the internal cache is of this type. + protected internal class Entry + { + internal System.String field; // which Fieldable + internal System.Object custom; // which custom comparator or parser + + /// Creates one of these objects for a custom comparator/parser. + internal Entry(System.String field, System.Object custom) + { + this.field = StringHelper.Intern(field); + this.custom = custom; + } + + /// Two of these are equal iff they reference the same field and type. + public override bool Equals(System.Object o) + { + if (o is Entry) + { + Entry other = (Entry) o; + if (other.field == field) + { + if (other.custom == null) + { + if (custom == null) + return true; + } + else if (other.custom.Equals(custom)) + { + return true; + } + } + } + return false; + } + + /// Composes a hashcode based on the field and type. + public override int GetHashCode() + { + return field.GetHashCode() ^ (custom == null?0:custom.GetHashCode()); + } + } + + // inherit javadocs + public virtual sbyte[] GetBytes(IndexReader reader, System.String field) + { + return GetBytes(reader, field, null); + } + + // inherit javadocs + public virtual sbyte[] GetBytes(IndexReader reader, System.String field, ByteParser parser) + { + return (sbyte[]) caches[typeof(sbyte)].Get(reader, new Entry(field, parser)); + } + + internal sealed class ByteCache:Cache + { + internal ByteCache(FieldCache wrapper):base(wrapper) + { + } + protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) + { + Entry entry = entryKey; + System.String field = entry.field; + ByteParser parser = (ByteParser) entry.custom; + if (parser == null) + { + return wrapper.GetBytes(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_BYTE_PARSER); + } + sbyte[] retArray = new sbyte[reader.MaxDoc]; + TermDocs termDocs = reader.TermDocs(); + TermEnum termEnum = reader.Terms(new Term(field)); + try + { + do + { + Term term = termEnum.Term; + if (term == null || (System.Object) term.Field != (System.Object) field) + break; + sbyte termval = parser.ParseByte(term.Text); + termDocs.Seek(termEnum); + while (termDocs.Next()) + { + retArray[termDocs.Doc] = termval; + } + } + while (termEnum.Next()); + } + catch (StopFillCacheException) + { + } + finally + { + termDocs.Close(); + termEnum.Close(); + } + return retArray; + } + } + + + // inherit javadocs + public virtual short[] GetShorts(IndexReader reader, System.String field) + { + return GetShorts(reader, field, null); + } + + // inherit javadocs + public virtual short[] GetShorts(IndexReader reader, System.String field, ShortParser parser) + { + return (short[]) caches[typeof(short)].Get(reader, new Entry(field, parser)); + } + + internal sealed class ShortCache:Cache + { + internal ShortCache(FieldCache wrapper):base(wrapper) + { + } + + protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) + { + Entry entry = entryKey; + System.String field = entry.field; + ShortParser parser = (ShortParser) entry.custom; + if (parser == null) + { + return wrapper.GetShorts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_SHORT_PARSER); + } + short[] retArray = new short[reader.MaxDoc]; + TermDocs termDocs = reader.TermDocs(); + TermEnum termEnum = reader.Terms(new Term(field)); + try + { + do + { + Term term = termEnum.Term; + if (term == null || (System.Object) term.Field != (System.Object) field) + break; + short termval = parser.ParseShort(term.Text); + termDocs.Seek(termEnum); + while (termDocs.Next()) + { + retArray[termDocs.Doc] = termval; + } + } + while (termEnum.Next()); + } + catch (StopFillCacheException) + { + } + finally + { + termDocs.Close(); + termEnum.Close(); + } + return retArray; + } + } + + + // inherit javadocs + public virtual int[] GetInts(IndexReader reader, System.String field) + { + return GetInts(reader, field, null); + } + + // inherit javadocs + public virtual int[] GetInts(IndexReader reader, System.String field, IntParser parser) + { + return (int[]) caches[typeof(int)].Get(reader, new Entry(field, parser)); + } + + internal sealed class IntCache:Cache + { + internal IntCache(FieldCache wrapper):base(wrapper) + { + } + + protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) + { + Entry entry = entryKey; + System.String field = entry.field; + IntParser parser = (IntParser) entry.custom; + if (parser == null) + { + try + { + return wrapper.GetInts(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_INT_PARSER); + } + catch (System.FormatException) + { + return wrapper.GetInts(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_INT_PARSER); + } + } + int[] retArray = null; + TermDocs termDocs = reader.TermDocs(); + TermEnum termEnum = reader.Terms(new Term(field)); + try + { + do + { + Term term = termEnum.Term; + if (term == null || (System.Object) term.Field != (System.Object) field) + break; + int termval = parser.ParseInt(term.Text); + if (retArray == null) + // late init + retArray = new int[reader.MaxDoc]; + termDocs.Seek(termEnum); + while (termDocs.Next()) + { + retArray[termDocs.Doc] = termval; + } + } + while (termEnum.Next()); + } + catch (StopFillCacheException) + { + } + finally + { + termDocs.Close(); + termEnum.Close(); + } + if (retArray == null) + // no values + retArray = new int[reader.MaxDoc]; + return retArray; + } + } + + + + // inherit javadocs + public virtual float[] GetFloats(IndexReader reader, System.String field) + { + return GetFloats(reader, field, null); + } + + // inherit javadocs + public virtual float[] GetFloats(IndexReader reader, System.String field, FloatParser parser) + { + + return (float[]) caches[typeof(float)].Get(reader, new Entry(field, parser)); + } + + internal sealed class FloatCache:Cache + { + internal FloatCache(FieldCache wrapper):base(wrapper) + { + } + + protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) + { + Entry entry = entryKey; + System.String field = entry.field; + FloatParser parser = (FloatParser) entry.custom; + if (parser == null) + { + try + { + return wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_FLOAT_PARSER); + } + catch (System.FormatException) + { + return wrapper.GetFloats(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_FLOAT_PARSER); + } + } + float[] retArray = null; + TermDocs termDocs = reader.TermDocs(); + TermEnum termEnum = reader.Terms(new Term(field)); + try + { + do + { + Term term = termEnum.Term; + if (term == null || (System.Object) term.Field != (System.Object) field) + break; + float termval = parser.ParseFloat(term.Text); + if (retArray == null) + // late init + retArray = new float[reader.MaxDoc]; + termDocs.Seek(termEnum); + while (termDocs.Next()) + { + retArray[termDocs.Doc] = termval; + } + } + while (termEnum.Next()); + } + catch (StopFillCacheException) + { + } + finally + { + termDocs.Close(); + termEnum.Close(); + } + if (retArray == null) + // no values + retArray = new float[reader.MaxDoc]; + return retArray; + } + } + + + + public virtual long[] GetLongs(IndexReader reader, System.String field) + { + return GetLongs(reader, field, null); + } + + // inherit javadocs + public virtual long[] GetLongs(IndexReader reader, System.String field, Lucene.Net.Search.LongParser parser) + { + return (long[]) caches[typeof(long)].Get(reader, new Entry(field, parser)); + } + + internal sealed class LongCache:Cache + { + internal LongCache(FieldCache wrapper):base(wrapper) + { + } + + protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) + { + Entry entry = entryKey; + System.String field = entry.field; + Lucene.Net.Search.LongParser parser = (Lucene.Net.Search.LongParser) entry.custom; + if (parser == null) + { + try + { + return wrapper.GetLongs(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_LONG_PARSER); + } + catch (System.FormatException) + { + return wrapper.GetLongs(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_LONG_PARSER); + } + } + long[] retArray = null; + TermDocs termDocs = reader.TermDocs(); + TermEnum termEnum = reader.Terms(new Term(field)); + try + { + do + { + Term term = termEnum.Term; + if (term == null || (System.Object) term.Field != (System.Object) field) + break; + long termval = parser.ParseLong(term.Text); + if (retArray == null) + // late init + retArray = new long[reader.MaxDoc]; + termDocs.Seek(termEnum); + while (termDocs.Next()) + { + retArray[termDocs.Doc] = termval; + } + } + while (termEnum.Next()); + } + catch (StopFillCacheException) + { + } + finally + { + termDocs.Close(); + termEnum.Close(); + } + if (retArray == null) + // no values + retArray = new long[reader.MaxDoc]; + return retArray; + } + } + + + // inherit javadocs + public virtual double[] GetDoubles(IndexReader reader, System.String field) + { + return GetDoubles(reader, field, null); + } + + // inherit javadocs + public virtual double[] GetDoubles(IndexReader reader, System.String field, Lucene.Net.Search.DoubleParser parser) + { + return (double[]) caches[typeof(double)].Get(reader, new Entry(field, parser)); + } + + internal sealed class DoubleCache:Cache + { + internal DoubleCache(FieldCache wrapper):base(wrapper) + { + } + + protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) + { + Entry entry = entryKey; + System.String field = entry.field; + Lucene.Net.Search.DoubleParser parser = (Lucene.Net.Search.DoubleParser) entry.custom; + if (parser == null) + { + try + { + return wrapper.GetDoubles(reader, field, Lucene.Net.Search.FieldCache_Fields.DEFAULT_DOUBLE_PARSER); + } + catch (System.FormatException) + { + return wrapper.GetDoubles(reader, field, Lucene.Net.Search.FieldCache_Fields.NUMERIC_UTILS_DOUBLE_PARSER); + } + } + double[] retArray = null; + TermDocs termDocs = reader.TermDocs(); + TermEnum termEnum = reader.Terms(new Term(field)); + try + { + do + { + Term term = termEnum.Term; + if (term == null || (System.Object) term.Field != (System.Object) field) + break; + double termval = parser.ParseDouble(term.Text); + if (retArray == null) + // late init + retArray = new double[reader.MaxDoc]; + termDocs.Seek(termEnum); + while (termDocs.Next()) + { + retArray[termDocs.Doc] = termval; + } + } + while (termEnum.Next()); + } + catch (StopFillCacheException) + { + } + finally + { + termDocs.Close(); + termEnum.Close(); + } + if (retArray == null) + // no values + retArray = new double[reader.MaxDoc]; + return retArray; + } + } + + + // inherit javadocs + public virtual System.String[] GetStrings(IndexReader reader, System.String field) + { + return (System.String[]) caches[typeof(string)].Get(reader, new Entry(field, (Parser) null)); + } + + internal sealed class StringCache:Cache + { + internal StringCache(FieldCache wrapper):base(wrapper) + { + } + + protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) + { + System.String field = StringHelper.Intern(entryKey.field); + System.String[] retArray = new System.String[reader.MaxDoc]; + TermDocs termDocs = reader.TermDocs(); + TermEnum termEnum = reader.Terms(new Term(field)); + try + { + do + { + Term term = termEnum.Term; + if (term == null || (System.Object) term.Field != (System.Object) field) + break; + System.String termval = term.Text; + termDocs.Seek(termEnum); + while (termDocs.Next()) + { + retArray[termDocs.Doc] = termval; + } + } + while (termEnum.Next()); + } + finally + { + termDocs.Close(); + termEnum.Close(); + } + return retArray; + } + } + + + // inherit javadocs + public virtual StringIndex GetStringIndex(IndexReader reader, System.String field) + { + return (StringIndex) caches[typeof(StringIndex)].Get(reader, new Entry(field, (Parser) null)); + } + + internal sealed class StringIndexCache:Cache + { + internal StringIndexCache(FieldCache wrapper):base(wrapper) + { + } + + protected internal override System.Object CreateValue(IndexReader reader, Entry entryKey) + { + System.String field = StringHelper.Intern(entryKey.field); + int[] retArray = new int[reader.MaxDoc]; + System.String[] mterms = new System.String[reader.MaxDoc + 1]; + TermDocs termDocs = reader.TermDocs(); + TermEnum termEnum = reader.Terms(new Term(field)); + int t = 0; // current term number + + // an entry for documents that have no terms in this field + // should a document with no terms be at top or bottom? + // this puts them at the top - if it is changed, FieldDocSortedHitQueue + // needs to change as well. + mterms[t++] = null; + + try + { + do + { + Term term = termEnum.Term; + if (term == null || term.Field != field || t >= mterms.Length) break; + + // store term text + mterms[t] = term.Text; + + termDocs.Seek(termEnum); + while (termDocs.Next()) + { + retArray[termDocs.Doc] = t; + } + + t++; + } + while (termEnum.Next()); + } + finally + { + termDocs.Close(); + termEnum.Close(); + } + + if (t == 0) + { + // if there are no terms, make the term array + // have a single null entry + mterms = new System.String[1]; + } + else if (t < mterms.Length) + { + // if there are less terms than documents, + // trim off the dead array space + System.String[] terms = new System.String[t]; + Array.Copy(mterms, 0, terms, 0, t); + mterms = terms; + } + + StringIndex value_Renamed = new StringIndex(retArray, mterms); + return value_Renamed; + } + } + + private volatile System.IO.StreamWriter infoStream; + + public virtual StreamWriter InfoStream + { + get { return infoStream; } + set { infoStream = value; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldCacheRangeFilter.cs b/external/Lucene.Net.Light/src/core/Search/FieldCacheRangeFilter.cs new file mode 100644 index 0000000000..7fc11752e4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldCacheRangeFilter.cs @@ -0,0 +1,964 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; +using Lucene.Net.Support; +using NumericField = Lucene.Net.Documents.NumericField; +using IndexReader = Lucene.Net.Index.IndexReader; +using TermDocs = Lucene.Net.Index.TermDocs; +using NumericUtils = Lucene.Net.Util.NumericUtils; + +namespace Lucene.Net.Search +{ + + /// A range filter built on top of a cached single term field (in ). + /// + ///

builds a single cache for the field the first time it is used. + /// Each subsequent on the same field then reuses this cache, + /// even if the range itself changes. + /// + ///

This means that is much faster (sometimes more than 100x as fast) + /// as building a if using a . However, if the range never changes it + /// is slower (around 2x as slow) than building a CachingWrapperFilter on top of a single . + /// + /// For numeric data types, this filter may be significantly faster than . + /// Furthermore, it does not need the numeric values encoded by . But + /// it has the problem that it only works with exact one value/document (see below). + /// + ///

As with all based functionality, is only valid for + /// fields which exact one term for each document (except for + /// where 0 terms are also allowed). Due to a restriction of , for numeric ranges + /// all terms that do not have a numeric value, 0 is assumed. + /// + ///

Thus it works on dates, prices and other single value fields but will not work on + /// regular text fields. It is preferable to use a NOT_ANALYZED field to ensure that + /// there is only a single term. + /// + ///

This class does not have an constructor, use one of the static factory methods available, + /// that create a correct instance for different data types supported by . + ///

+ + public static class FieldCacheRangeFilter + { + [Serializable] + private class AnonymousClassFieldCacheRangeFilter : FieldCacheRangeFilter + { + private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet + { + private void InitBlock(Lucene.Net.Search.StringIndex fcsi, int inclusiveLowerPoint, int inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance) + { + this.fcsi = fcsi; + this.inclusiveLowerPoint = inclusiveLowerPoint; + this.inclusiveUpperPoint = inclusiveUpperPoint; + this.enclosingInstance = enclosingInstance; + } + private Lucene.Net.Search.StringIndex fcsi; + private int inclusiveLowerPoint; + private int inclusiveUpperPoint; + private FieldCacheRangeFilter enclosingInstance; + public FieldCacheRangeFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFieldCacheDocIdSet(Lucene.Net.Search.StringIndex fcsi, int inclusiveLowerPoint, int inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2) + : base(Param1, Param2) + { + InitBlock(fcsi, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance); + } + internal override bool MatchDoc(int doc) + { + return fcsi.order[doc] >= inclusiveLowerPoint && fcsi.order[doc] <= inclusiveUpperPoint; + } + } + internal AnonymousClassFieldCacheRangeFilter(string field, Lucene.Net.Search.Parser parser, string lowerVal, string upperVal, bool includeLower, bool includeUpper) + : base(field, parser, lowerVal, upperVal, includeLower, includeUpper) + { + } + public override DocIdSet GetDocIdSet(IndexReader reader) + { + Lucene.Net.Search.StringIndex fcsi = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStringIndex(reader, field); + int lowerPoint = fcsi.BinarySearchLookup(lowerVal); + int upperPoint = fcsi.BinarySearchLookup(upperVal); + + int inclusiveLowerPoint; + int inclusiveUpperPoint; + + // Hints: + // * binarySearchLookup returns 0, if value was null. + // * the value is <0 if no exact hit was found, the returned value + // is (-(insertion point) - 1) + if (lowerPoint == 0) + { + System.Diagnostics.Debug.Assert(lowerVal == null); + inclusiveLowerPoint = 1; + } + else if (includeLower && lowerPoint > 0) + { + inclusiveLowerPoint = lowerPoint; + } + else if (lowerPoint > 0) + { + inclusiveLowerPoint = lowerPoint + 1; + } + else + { + inclusiveLowerPoint = System.Math.Max(1, -lowerPoint - 1); + } + + if (upperPoint == 0) + { + System.Diagnostics.Debug.Assert(upperVal == null); + inclusiveUpperPoint = System.Int32.MaxValue; + } + else if (includeUpper && upperPoint > 0) + { + inclusiveUpperPoint = upperPoint; + } + else if (upperPoint > 0) + { + inclusiveUpperPoint = upperPoint - 1; + } + else + { + inclusiveUpperPoint = -upperPoint - 2; + } + + if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint) + return DocIdSet.EMPTY_DOCIDSET; + + System.Diagnostics.Debug.Assert(inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0); + + // for this DocIdSet, we never need to use TermDocs, + // because deleted docs have an order of 0 (null entry in StringIndex) + return new AnonymousClassFieldCacheDocIdSet(fcsi, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, false); + } + } + [Serializable] + private class AnonymousClassFieldCacheRangeFilter1 : FieldCacheRangeFilter + { + private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet + { + private void InitBlock(sbyte[] values, sbyte inclusiveLowerPoint, sbyte inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance) + { + this.values = values; + this.inclusiveLowerPoint = inclusiveLowerPoint; + this.inclusiveUpperPoint = inclusiveUpperPoint; + this.enclosingInstance = enclosingInstance; + } + private sbyte[] values; + private sbyte inclusiveLowerPoint; + private sbyte inclusiveUpperPoint; + private FieldCacheRangeFilter enclosingInstance; + public FieldCacheRangeFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFieldCacheDocIdSet(sbyte[] values, sbyte inclusiveLowerPoint, sbyte inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2) + : base(Param1, Param2) + { + InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance); + } + internal override bool MatchDoc(int doc) + { + return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; + } + } + internal AnonymousClassFieldCacheRangeFilter1(string field, Parser parser, sbyte? lowerVal, sbyte? upperVal, bool includeLower, bool includeUpper) + : base(field, parser, lowerVal, upperVal, includeLower, includeUpper) + { + } + public override DocIdSet GetDocIdSet(IndexReader reader) + { + sbyte inclusiveLowerPoint; + sbyte inclusiveUpperPoint; + if (lowerVal != null) + { + sbyte i = (sbyte)lowerVal; + if (!includeLower && i == sbyte.MaxValue) + return DocIdSet.EMPTY_DOCIDSET; + inclusiveLowerPoint = (sbyte)(includeLower ? i : (i + 1)); + } + else + { + inclusiveLowerPoint = sbyte.MinValue; + } + if (upperVal != null) + { + sbyte i = (sbyte)upperVal; + if (!includeUpper && i == sbyte.MinValue) + return DocIdSet.EMPTY_DOCIDSET; + inclusiveUpperPoint = (sbyte)(includeUpper ? i : (i - 1)); + } + else + { + inclusiveUpperPoint = sbyte.MaxValue; + } + + if (inclusiveLowerPoint > inclusiveUpperPoint) + return DocIdSet.EMPTY_DOCIDSET; + + sbyte[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetBytes(reader, field, (Lucene.Net.Search.ByteParser)parser); + // we only request the usage of termDocs, if the range contains 0 + return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)); + } + } + [Serializable] + private class AnonymousClassFieldCacheRangeFilter2 : FieldCacheRangeFilter + { + private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet + { + private void InitBlock(short[] values, short inclusiveLowerPoint, short inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance) + { + this.values = values; + this.inclusiveLowerPoint = inclusiveLowerPoint; + this.inclusiveUpperPoint = inclusiveUpperPoint; + this.enclosingInstance = enclosingInstance; + } + private short[] values; + private short inclusiveLowerPoint; + private short inclusiveUpperPoint; + private FieldCacheRangeFilter enclosingInstance; + public FieldCacheRangeFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFieldCacheDocIdSet(short[] values, short inclusiveLowerPoint, short inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2) + : base(Param1, Param2) + { + InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance); + } + internal override bool MatchDoc(int doc) + { + return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; + } + } + internal AnonymousClassFieldCacheRangeFilter2(string field, Parser parser, short? lowerVal, short? upperVal, bool includeLower, bool includeUpper) + : base(field, parser, lowerVal, upperVal, includeLower, includeUpper) + { + } + public override DocIdSet GetDocIdSet(IndexReader reader) + { + short inclusiveLowerPoint; + short inclusiveUpperPoint; + if (lowerVal != null) + { + short i = (short)lowerVal; + if (!includeLower && i == short.MaxValue) + return DocIdSet.EMPTY_DOCIDSET; + inclusiveLowerPoint = (short)(includeLower ? i : (i + 1)); + } + else + { + inclusiveLowerPoint = short.MinValue; + } + if (upperVal != null) + { + short i = (short)upperVal; + if (!includeUpper && i == short.MinValue) + return DocIdSet.EMPTY_DOCIDSET; + inclusiveUpperPoint = (short)(includeUpper ? i : (i - 1)); + } + else + { + inclusiveUpperPoint = short.MaxValue; + } + + if (inclusiveLowerPoint > inclusiveUpperPoint) + return DocIdSet.EMPTY_DOCIDSET; + + short[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetShorts(reader, field, (Lucene.Net.Search.ShortParser)parser); + // we only request the usage of termDocs, if the range contains 0 + return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)); + } + } + [Serializable] + private class AnonymousClassFieldCacheRangeFilter3 : FieldCacheRangeFilter + { + private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet + { + private void InitBlock(int[] values, int inclusiveLowerPoint, int inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance) + { + this.values = values; + this.inclusiveLowerPoint = inclusiveLowerPoint; + this.inclusiveUpperPoint = inclusiveUpperPoint; + this.enclosingInstance = enclosingInstance; + } + private int[] values; + private int inclusiveLowerPoint; + private int inclusiveUpperPoint; + private FieldCacheRangeFilter enclosingInstance; + public FieldCacheRangeFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFieldCacheDocIdSet(int[] values, int inclusiveLowerPoint, int inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2) + : base(Param1, Param2) + { + InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance); + } + internal override bool MatchDoc(int doc) + { + return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; + } + } + internal AnonymousClassFieldCacheRangeFilter3(string field, Lucene.Net.Search.Parser parser, int? lowerVal, int? upperVal, bool includeLower, bool includeUpper) + : base(field, parser, lowerVal, upperVal, includeLower, includeUpper) + { + } + public override DocIdSet GetDocIdSet(IndexReader reader) + { + int inclusiveLowerPoint; + int inclusiveUpperPoint; + if (lowerVal != null) + { + int i = (int)lowerVal; + if (!includeLower && i == int.MaxValue) + return DocIdSet.EMPTY_DOCIDSET; + inclusiveLowerPoint = includeLower ? i : (i + 1); + } + else + { + inclusiveLowerPoint = int.MinValue; + } + if (upperVal != null) + { + int i = (int)upperVal; + if (!includeUpper && i == int.MinValue) + return DocIdSet.EMPTY_DOCIDSET; + inclusiveUpperPoint = includeUpper ? i : (i - 1); + } + else + { + inclusiveUpperPoint = int.MaxValue; + } + + if (inclusiveLowerPoint > inclusiveUpperPoint) + return DocIdSet.EMPTY_DOCIDSET; + + int[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetInts(reader, field, (Lucene.Net.Search.IntParser)parser); + // we only request the usage of termDocs, if the range contains 0 + return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)); + } + } + [Serializable] + private class AnonymousClassFieldCacheRangeFilter4 : FieldCacheRangeFilter + { + private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet + { + private void InitBlock(long[] values, long inclusiveLowerPoint, long inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance) + { + this.values = values; + this.inclusiveLowerPoint = inclusiveLowerPoint; + this.inclusiveUpperPoint = inclusiveUpperPoint; + this.enclosingInstance = enclosingInstance; + } + private long[] values; + private long inclusiveLowerPoint; + private long inclusiveUpperPoint; + private FieldCacheRangeFilter enclosingInstance; + public FieldCacheRangeFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFieldCacheDocIdSet(long[] values, long inclusiveLowerPoint, long inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2) + : base(Param1, Param2) + { + InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance); + } + internal override bool MatchDoc(int doc) + { + return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; + } + } + internal AnonymousClassFieldCacheRangeFilter4(string field, Lucene.Net.Search.Parser parser, long? lowerVal, long? upperVal, bool includeLower, bool includeUpper) + : base(field, parser, lowerVal, upperVal, includeLower, includeUpper) + { + } + public override DocIdSet GetDocIdSet(IndexReader reader) + { + long inclusiveLowerPoint; + long inclusiveUpperPoint; + if (lowerVal != null) + { + long i = (long)lowerVal; + if (!includeLower && i == long.MaxValue) + return DocIdSet.EMPTY_DOCIDSET; + inclusiveLowerPoint = includeLower ? i : (i + 1L); + } + else + { + inclusiveLowerPoint = long.MinValue; + } + if (upperVal != null) + { + long i = (long)upperVal; + if (!includeUpper && i == long.MinValue) + return DocIdSet.EMPTY_DOCIDSET; + inclusiveUpperPoint = includeUpper ? i : (i - 1L); + } + else + { + inclusiveUpperPoint = long.MaxValue; + } + + if (inclusiveLowerPoint > inclusiveUpperPoint) + return DocIdSet.EMPTY_DOCIDSET; + + long[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetLongs(reader, field, (Lucene.Net.Search.LongParser)parser); + // we only request the usage of termDocs, if the range contains 0 + return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)); + } + } + [Serializable] + private class AnonymousClassFieldCacheRangeFilter5 : FieldCacheRangeFilter + { + private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet + { + private void InitBlock(float[] values, float inclusiveLowerPoint, float inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance) + { + this.values = values; + this.inclusiveLowerPoint = inclusiveLowerPoint; + this.inclusiveUpperPoint = inclusiveUpperPoint; + this.enclosingInstance = enclosingInstance; + } + private float[] values; + private float inclusiveLowerPoint; + private float inclusiveUpperPoint; + private FieldCacheRangeFilter enclosingInstance; + public FieldCacheRangeFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFieldCacheDocIdSet(float[] values, float inclusiveLowerPoint, float inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2) + : base(Param1, Param2) + { + InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance); + } + internal override bool MatchDoc(int doc) + { + return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; + } + } + internal AnonymousClassFieldCacheRangeFilter5(string field, Lucene.Net.Search.Parser parser, float? lowerVal, float? upperVal, bool includeLower, bool includeUpper) + : base(field, parser, lowerVal, upperVal, includeLower, includeUpper) + { + } + public override DocIdSet GetDocIdSet(IndexReader reader) + { + // we transform the floating point numbers to sortable integers + // using NumericUtils to easier find the next bigger/lower value + float inclusiveLowerPoint; + float inclusiveUpperPoint; + if (lowerVal != null) + { + float f = (float)lowerVal; + if (!includeUpper && f > 0.0f && float.IsInfinity(f)) + return DocIdSet.EMPTY_DOCIDSET; + int i = NumericUtils.FloatToSortableInt(f); + inclusiveLowerPoint = NumericUtils.SortableIntToFloat(includeLower ? i : (i + 1)); + } + else + { + inclusiveLowerPoint = float.NegativeInfinity; + } + if (upperVal != null) + { + float f = (float)upperVal; + if (!includeUpper && f < 0.0f && float.IsInfinity(f)) + return DocIdSet.EMPTY_DOCIDSET; + int i = NumericUtils.FloatToSortableInt(f); + inclusiveUpperPoint = NumericUtils.SortableIntToFloat(includeUpper ? i : (i - 1)); + } + else + { + inclusiveUpperPoint = float.PositiveInfinity; + } + + if (inclusiveLowerPoint > inclusiveUpperPoint) + return DocIdSet.EMPTY_DOCIDSET; + + float[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetFloats(reader, field, (Lucene.Net.Search.FloatParser)parser); + // we only request the usage of termDocs, if the range contains 0 + return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)); + } + } + [Serializable] + private class AnonymousClassFieldCacheRangeFilter6 : FieldCacheRangeFilter + { + private class AnonymousClassFieldCacheDocIdSet : FieldCacheDocIdSet + { + private void InitBlock(double[] values, double inclusiveLowerPoint, double inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance) + { + this.values = values; + this.inclusiveLowerPoint = inclusiveLowerPoint; + this.inclusiveUpperPoint = inclusiveUpperPoint; + this.enclosingInstance = enclosingInstance; + } + private double[] values; + private double inclusiveLowerPoint; + private double inclusiveUpperPoint; + private FieldCacheRangeFilter enclosingInstance; + public FieldCacheRangeFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFieldCacheDocIdSet(double[] values, double inclusiveLowerPoint, double inclusiveUpperPoint, FieldCacheRangeFilter enclosingInstance, Lucene.Net.Index.IndexReader Param1, bool Param2) + : base(Param1, Param2) + { + InitBlock(values, inclusiveLowerPoint, inclusiveUpperPoint, enclosingInstance); + } + internal override bool MatchDoc(int doc) + { + return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; + } + } + internal AnonymousClassFieldCacheRangeFilter6(string field, Lucene.Net.Search.Parser parser, double? lowerVal, double? upperVal, bool includeLower, bool includeUpper) + : base(field, parser, lowerVal, upperVal, includeLower, includeUpper) + { + } + public override DocIdSet GetDocIdSet(IndexReader reader) + { + // we transform the floating point numbers to sortable integers + // using NumericUtils to easier find the next bigger/lower value + double inclusiveLowerPoint; + double inclusiveUpperPoint; + if (lowerVal != null) + { + double f = (double)lowerVal; + if (!includeUpper && f > 0.0 && double.IsInfinity(f)) + return DocIdSet.EMPTY_DOCIDSET; + long i = NumericUtils.DoubleToSortableLong(f); + inclusiveLowerPoint = NumericUtils.SortableLongToDouble(includeLower ? i : (i + 1L)); + } + else + { + inclusiveLowerPoint = double.NegativeInfinity; + } + if (upperVal != null) + { + double f = (double)upperVal; + if (!includeUpper && f < 0.0 && double.IsInfinity(f)) + return DocIdSet.EMPTY_DOCIDSET; + long i = NumericUtils.DoubleToSortableLong(f); + inclusiveUpperPoint = NumericUtils.SortableLongToDouble(includeUpper ? i : (i - 1L)); + } + else + { + inclusiveUpperPoint = double.PositiveInfinity; + } + + if (inclusiveLowerPoint > inclusiveUpperPoint) + return DocIdSet.EMPTY_DOCIDSET; + + double[] values = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetDoubles(reader, field, (Lucene.Net.Search.DoubleParser)parser); + // we only request the usage of termDocs, if the range contains 0 + return new AnonymousClassFieldCacheDocIdSet(values, inclusiveLowerPoint, inclusiveUpperPoint, this, reader, (inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)); + } + } + + /// Creates a string range filter using . This works with all + /// fields containing zero or one term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewStringRange(string field, string lowerVal, string upperVal, bool includeLower, bool includeUpper) + { + return new AnonymousClassFieldCacheRangeFilter(field, null, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range filter using . This works with all + /// byte fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewByteRange(string field, sbyte? lowerVal, sbyte? upperVal, bool includeLower, bool includeUpper) + { + return NewByteRange(field, null, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range filter using . This works with all + /// byte fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewByteRange(string field, Lucene.Net.Search.ByteParser parser, sbyte? lowerVal, sbyte? upperVal, bool includeLower, bool includeUpper) + { + return new AnonymousClassFieldCacheRangeFilter1(field, parser, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// short fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewShortRange(string field, short? lowerVal, short? upperVal, bool includeLower, bool includeUpper) + { + return NewShortRange(field, null, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// short fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewShortRange(string field, Lucene.Net.Search.ShortParser parser, short? lowerVal, short? upperVal, bool includeLower, bool includeUpper) + { + return new AnonymousClassFieldCacheRangeFilter2(field, parser, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// int fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewIntRange(string field, int? lowerVal, int? upperVal, bool includeLower, bool includeUpper) + { + return NewIntRange(field, null, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// int fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewIntRange(string field, Lucene.Net.Search.IntParser parser, int? lowerVal, int? upperVal, bool includeLower, bool includeUpper) + { + return new AnonymousClassFieldCacheRangeFilter3(field, parser, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewLongRange(string field, long? lowerVal, long? upperVal, bool includeLower, bool includeUpper) + { + return NewLongRange(field, null, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewLongRange(string field, Lucene.Net.Search.LongParser parser, long? lowerVal, long? upperVal, bool includeLower, bool includeUpper) + { + return new AnonymousClassFieldCacheRangeFilter4(field, parser, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// float fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewFloatRange(string field, float? lowerVal, float? upperVal, bool includeLower, bool includeUpper) + { + return NewFloatRange(field, null, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// float fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewFloatRange(string field, Lucene.Net.Search.FloatParser parser, float? lowerVal, float? upperVal, bool includeLower, bool includeUpper) + { + return new AnonymousClassFieldCacheRangeFilter5(field, parser, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// double fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewDoubleRange(string field, double? lowerVal, double? upperVal, bool includeLower, bool includeUpper) + { + return NewDoubleRange(field, null, lowerVal, upperVal, includeLower, includeUpper); + } + + /// Creates a numeric range query using . This works with all + /// double fields containing exactly one numeric term in the field. The range can be half-open by setting one + /// of the values to null. + /// + public static FieldCacheRangeFilter NewDoubleRange(string field, Lucene.Net.Search.DoubleParser parser, double? lowerVal, double? upperVal, bool includeLower, bool includeUpper) + { + return new AnonymousClassFieldCacheRangeFilter6(field, parser, lowerVal, upperVal, includeLower, includeUpper); + } + } + + [Serializable] + public abstract class FieldCacheRangeFilter : Filter + { + internal System.String field; + internal Lucene.Net.Search.Parser parser; + internal T lowerVal; + internal T upperVal; + internal bool includeLower; + internal bool includeUpper; + + protected internal FieldCacheRangeFilter(System.String field, Lucene.Net.Search.Parser parser, T lowerVal, T upperVal, bool includeLower, bool includeUpper) + { + this.field = field; + this.parser = parser; + this.lowerVal = lowerVal; + this.upperVal = upperVal; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + } + + /// This method is implemented for each data type + public abstract override DocIdSet GetDocIdSet(IndexReader reader); + + public override System.String ToString() + { + System.Text.StringBuilder sb = new System.Text.StringBuilder(field).Append(":"); + return sb.Append(includeLower?'[':'{').Append((lowerVal == null)?"*":lowerVal.ToString()).Append(" TO ").Append((upperVal == null)?"*":upperVal.ToString()).Append(includeUpper?']':'}').ToString(); + } + + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (!(o is FieldCacheRangeFilter)) + return false; + FieldCacheRangeFilter other = (FieldCacheRangeFilter) o; + + if (!this.field.Equals(other.field) || this.includeLower != other.includeLower || this.includeUpper != other.includeUpper) + { + return false; + } + if (this.lowerVal != null ?! this.lowerVal.Equals(other.lowerVal):other.lowerVal != null) + return false; + if (this.upperVal != null ?! this.upperVal.Equals(other.upperVal):other.upperVal != null) + return false; + if (this.parser != null ?! this.parser.Equals(other.parser):other.parser != null) + return false; + return true; + } + + public override int GetHashCode() + { + int h = field.GetHashCode(); + h ^= ((lowerVal != null)?lowerVal.GetHashCode():550356204); + h = (h << 1) | (Number.URShift(h, 31)); // rotate to distinguish lower from upper + h ^= ((upperVal != null)?upperVal.GetHashCode():- 1674416163); + h ^= ((parser != null)?parser.GetHashCode():- 1572457324); + h ^= (includeLower?1549299360:- 365038026) ^ (includeUpper?1721088258:1948649653); + return h; + } + + /// + /// Returns the field name for this filter + /// + public string GetField { get { return field; } } + + /// + /// Returns true if the lower endpoint is inclusive + /// + public bool IncludesLower { get { return includeLower; } } + + /// + /// Returns true if the upper endpoint is inclusive + /// + public bool IncludesUpper { get { return includeUpper; } } + + /// + /// Returns the lower value of the range filter + /// + public T LowerValue { get { return lowerVal; } } + + /// + /// Returns the upper value of this range filter + /// + public T UpperValue { get { return upperVal; } } + + public Parser Parser { get { return parser; } } + + internal abstract class FieldCacheDocIdSet:DocIdSet + { + private class AnonymousClassDocIdSetIterator : DocIdSetIterator + { + public AnonymousClassDocIdSetIterator(Lucene.Net.Index.TermDocs termDocs, FieldCacheDocIdSet enclosingInstance) + { + InitBlock(termDocs, enclosingInstance); + } + private void InitBlock(Lucene.Net.Index.TermDocs termDocs, FieldCacheDocIdSet enclosingInstance) + { + this.termDocs = termDocs; + this.enclosingInstance = enclosingInstance; + } + private Lucene.Net.Index.TermDocs termDocs; + private FieldCacheDocIdSet enclosingInstance; + public FieldCacheDocIdSet Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private int doc = - 1; + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + do + { + if (!termDocs.Next()) + return doc = NO_MORE_DOCS; + } + while (!Enclosing_Instance.MatchDoc(doc = termDocs.Doc)); + return doc; + } + + public override int Advance(int target) + { + if (!termDocs.SkipTo(target)) + return doc = NO_MORE_DOCS; + while (!Enclosing_Instance.MatchDoc(doc = termDocs.Doc)) + { + if (!termDocs.Next()) + return doc = NO_MORE_DOCS; + } + return doc; + } + } + private class AnonymousClassDocIdSetIterator1:DocIdSetIterator + { + public AnonymousClassDocIdSetIterator1(FieldCacheDocIdSet enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(FieldCacheDocIdSet enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private FieldCacheDocIdSet enclosingInstance; + public FieldCacheDocIdSet Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private int doc = - 1; + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + try + { + do + { + doc++; + } + while (!Enclosing_Instance.MatchDoc(doc)); + return doc; + } + catch (System.IndexOutOfRangeException) + { + return doc = NO_MORE_DOCS; + } + } + + public override int Advance(int target) + { + try + { + doc = target; + while (!Enclosing_Instance.MatchDoc(doc)) + { + doc++; + } + return doc; + } + catch (System.IndexOutOfRangeException) + { + return doc = NO_MORE_DOCS; + } + } + } + private IndexReader reader; + private bool mayUseTermDocs; + + internal FieldCacheDocIdSet(IndexReader reader, bool mayUseTermDocs) + { + this.reader = reader; + this.mayUseTermDocs = mayUseTermDocs; + } + + /// this method checks, if a doc is a hit, should throw AIOBE, when position invalid + internal abstract bool MatchDoc(int doc); + + /// this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs + public override bool IsCacheable + { + get { return !(mayUseTermDocs && reader.HasDeletions); } + } + + public override DocIdSetIterator Iterator() + { + // Synchronization needed because deleted docs BitVector + // can change after call to hasDeletions until TermDocs creation. + // We only use an iterator with termDocs, when this was requested (e.g. range contains 0) + // and the index has deletions + TermDocs termDocs; + lock (reader) + { + termDocs = IsCacheable ? null : reader.TermDocs(null); + } + if (termDocs != null) + { + // a DocIdSetIterator using TermDocs to iterate valid docIds + return new AnonymousClassDocIdSetIterator(termDocs, this); + } + else + { + // a DocIdSetIterator generating docIds by incrementing a variable - + // this one can be used if there are no deletions are on the index + return new AnonymousClassDocIdSetIterator1(this); + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldCacheTermsFilter.cs b/external/Lucene.Net.Light/src/core/Search/FieldCacheTermsFilter.cs new file mode 100644 index 0000000000..d6526f40f9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldCacheTermsFilter.cs @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using TermDocs = Lucene.Net.Index.TermDocs; +using OpenBitSet = Lucene.Net.Util.OpenBitSet; + +namespace Lucene.Net.Search +{ + + /// A that only accepts documents whose single + /// term value in the specified field is contained in the + /// provided set of allowed terms. + /// + ///

+ /// + /// This is the same functionality as TermsFilter (from + /// contrib/queries), except this filter requires that the + /// field contains only a single term for all documents. + /// Because of drastically different implementations, they + /// also have different performance characteristics, as + /// described below. + /// + ///

+ /// + /// The first invocation of this filter on a given field will + /// be slower, since a must be + /// created. Subsequent invocations using the same field + /// will re-use this cache. However, as with all + /// functionality based on , persistent RAM + /// is consumed to hold the cache, and is not freed until the + /// is closed. In contrast, TermsFilter + /// has no persistent RAM consumption. + /// + /// + ///

+ /// + /// With each search, this filter translates the specified + /// set of Terms into a private keyed by + /// term number per unique (normally one + /// reader per segment). Then, during matching, the term + /// number for each docID is retrieved from the cache and + /// then checked for inclusion using the . + /// Since all testing is done using RAM resident data + /// structures, performance should be very fast, most likely + /// fast enough to not require further caching of the + /// DocIdSet for each possible combination of terms. + /// However, because docIDs are simply scanned linearly, an + /// index with a great many small documents may find this + /// linear scan too costly. + /// + ///

+ /// + /// In contrast, TermsFilter builds up an , + /// keyed by docID, every time it's created, by enumerating + /// through all matching docs using to seek + /// and scan through each term's docID list. While there is + /// no linear scan of all docIDs, besides the allocation of + /// the underlying array in the , this + /// approach requires a number of "disk seeks" in proportion + /// to the number of terms, which can be exceptionally costly + /// when there are cache misses in the OS's IO cache. + /// + ///

+ /// + /// Generally, this filter will be slower on the first + /// invocation for a given field, but subsequent invocations, + /// even if you change the allowed set of Terms, should be + /// faster than TermsFilter, especially as the number of + /// Terms being matched increases. If you are matching only + /// a very small number of terms, and those terms in turn + /// match a very small number of documents, TermsFilter may + /// perform faster. + /// + ///

+ /// + /// Which filter is best is very application dependent. + ///

+ + [Serializable] + public class FieldCacheTermsFilter:Filter + { + private readonly string field; + private readonly string[] terms; + + public FieldCacheTermsFilter(string field, params string[] terms) + { + this.field = field; + this.terms = terms; + } + + public virtual FieldCache FieldCache + { + get { return FieldCache_Fields.DEFAULT; } + } + + public override DocIdSet GetDocIdSet(IndexReader reader) + { + return new FieldCacheTermsFilterDocIdSet(this, FieldCache.GetStringIndex(reader, field)); + } + + protected internal class FieldCacheTermsFilterDocIdSet:DocIdSet + { + private void InitBlock(FieldCacheTermsFilter enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private FieldCacheTermsFilter enclosingInstance; + public FieldCacheTermsFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private readonly Lucene.Net.Search.StringIndex fcsi; + + private readonly OpenBitSet openBitSet; + + public FieldCacheTermsFilterDocIdSet(FieldCacheTermsFilter enclosingInstance, StringIndex fcsi) + { + InitBlock(enclosingInstance); + this.fcsi = fcsi; + openBitSet = new OpenBitSet(this.fcsi.lookup.Length); + foreach (string t in Enclosing_Instance.terms) + { + int termNumber = this.fcsi.BinarySearchLookup(t); + if (termNumber > 0) + { + openBitSet.FastSet(termNumber); + } + } + } + + public override DocIdSetIterator Iterator() + { + return new FieldCacheTermsFilterDocIdSetIterator(this); + } + + /// This DocIdSet implementation is cacheable. + public override bool IsCacheable + { + get { return true; } + } + + protected internal class FieldCacheTermsFilterDocIdSetIterator:DocIdSetIterator + { + public FieldCacheTermsFilterDocIdSetIterator(FieldCacheTermsFilterDocIdSet enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(FieldCacheTermsFilterDocIdSet enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private FieldCacheTermsFilterDocIdSet enclosingInstance; + public FieldCacheTermsFilterDocIdSet Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private int doc = - 1; + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + try + { + while (!Enclosing_Instance.openBitSet.FastGet(Enclosing_Instance.fcsi.order[++doc])) + { + } + } + catch (IndexOutOfRangeException) + { + doc = NO_MORE_DOCS; + } + return doc; + } + + public override int Advance(int target) + { + try + { + doc = target; + while (!Enclosing_Instance.openBitSet.FastGet(Enclosing_Instance.fcsi.order[doc])) + { + doc++; + } + } + catch (IndexOutOfRangeException) + { + doc = NO_MORE_DOCS; + } + return doc; + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldComparator.cs b/external/Lucene.Net.Light/src/core/Search/FieldComparator.cs new file mode 100644 index 0000000000..0eb5f52b47 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldComparator.cs @@ -0,0 +1,1065 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; +using ByteParser = Lucene.Net.Search.ByteParser; +using DoubleParser = Lucene.Net.Search.DoubleParser; +using FloatParser = Lucene.Net.Search.FloatParser; +using IntParser = Lucene.Net.Search.IntParser; +using LongParser = Lucene.Net.Search.LongParser; +using ShortParser = Lucene.Net.Search.ShortParser; +using StringIndex = Lucene.Net.Search.StringIndex; + +namespace Lucene.Net.Search +{ + + /// Expert: a FieldComparator compares hits so as to determine their + /// sort order when collecting the top results with + ///. The concrete public FieldComparator + /// classes here correspond to the SortField types. + /// + ///

This API is designed to achieve high performance + /// sorting, by exposing a tight interaction with + /// as it visits hits. Whenever a hit is + /// competitive, it's enrolled into a virtual slot, which is + /// an int ranging from 0 to numHits-1. The + /// is made aware of segment transitions + /// during searching in case any internal state it's tracking + /// needs to be recomputed during these transitions.

+ /// + ///

A comparator must define these functions:

+ /// + /// + /// + /// Compare a hit at 'slot a' + /// with hit 'slot b'. + /// + /// This method is called by + /// to notify the + /// FieldComparator of the current weakest ("bottom") + /// slot. Note that this slot may not hold the weakest + /// value according to your comparator, in cases where + /// your comparator is not the primary one (ie, is only + /// used to break ties from the comparators before it). + /// + /// Compare a new hit (docID) + /// against the "weakest" (bottom) entry in the queue. + /// + /// Installs a new hit into the + /// priority queue. The + /// calls this method when a new hit is competitive. + /// + /// Invoked + /// when the search is switching to the next segment. + /// You may need to update internal state of the + /// comparator, for example retrieving new values from + /// the . + /// + /// Return the sort value stored in + /// the specified slot. This is only called at the end + /// of the search, in order to populate + /// when returning the top results. + /// + /// + /// NOTE: This API is experimental and might change in + /// incompatible ways in the next release. + ///

+ public abstract class FieldComparator + { + /// Compare hit at slot1 with hit at slot2. + /// + /// + /// first slot to compare + /// + /// second slot to compare + /// + /// any N < 0 if slot2's value is sorted after + /// slot1, any N > 0 if the slot2's value is sorted before + /// slot1 and 0 if they are equal + /// + public abstract int Compare(int slot1, int slot2); + + /// Set the bottom slot, ie the "weakest" (sorted last) + /// entry in the queue. When is + /// called, you should compare against this slot. This + /// will always be called before . + /// + /// + /// the currently weakest (sorted last) slot in the queue + /// + public abstract void SetBottom(int slot); + + /// Compare the bottom of the queue with doc. This will + /// only invoked after setBottom has been called. This + /// should return the same result as + ///} as if bottom were slot1 and the new + /// document were slot 2. + /// + ///

For a search that hits many results, this method + /// will be the hotspot (invoked by far the most + /// frequently).

+ /// + ///

+ /// that was hit + /// + /// any N < 0 if the doc's value is sorted after + /// the bottom entry (not competitive), any N > 0 if the + /// doc's value is sorted before the bottom entry and 0 if + /// they are equal. + /// + public abstract int CompareBottom(int doc); + + /// This method is called when a new hit is competitive. + /// You should copy any state associated with this document + /// that will be required for future comparisons, into the + /// specified slot. + /// + /// + /// which slot to copy the hit to + /// + /// docID relative to current reader + /// + public abstract void Copy(int slot, int doc); + + /// Set a new Reader. All doc correspond to the current Reader. + /// + /// + /// current reader + /// + /// docBase of this reader + /// + /// IOException + /// IOException + public abstract void SetNextReader(IndexReader reader, int docBase); + + /// Sets the Scorer to use in case a document's score is + /// needed. + /// + /// + /// Scorer instance that you should use to + /// obtain the current hit's score, if necessary. + /// + public virtual void SetScorer(Scorer scorer) + { + // Empty implementation since most comparators don't need the score. This + // can be overridden by those that need it. + } + + /// Return the actual value in the slot. + /// + /// + /// the value + /// + /// value in this slot upgraded to Comparable + /// + public abstract IComparable this[int slot] { get; } + + /// Parses field's values as byte (using + /// and sorts by ascending value + /// + public sealed class ByteComparator:FieldComparator + { + private sbyte[] values; + private sbyte[] currentReaderValues; + private System.String field; + private ByteParser parser; + private sbyte bottom; + + internal ByteComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser) + { + values = new sbyte[numHits]; + this.field = field; + this.parser = (ByteParser) parser; + } + + public override int Compare(int slot1, int slot2) + { + return values[slot1] - values[slot2]; + } + + public override int CompareBottom(int doc) + { + return bottom - currentReaderValues[doc]; + } + + public override void Copy(int slot, int doc) + { + values[slot] = currentReaderValues[doc]; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetBytes(reader, field, parser); + } + + public override void SetBottom(int bottom) + { + this.bottom = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return (sbyte) values[slot]; } + } + } + + /// Sorts by ascending docID + public sealed class DocComparator:FieldComparator + { + private int[] docIDs; + private int docBase; + private int bottom; + + internal DocComparator(int numHits) + { + docIDs = new int[numHits]; + } + + public override int Compare(int slot1, int slot2) + { + // No overflow risk because docIDs are non-negative + return docIDs[slot1] - docIDs[slot2]; + } + + public override int CompareBottom(int doc) + { + // No overflow risk because docIDs are non-negative + return bottom - (docBase + doc); + } + + public override void Copy(int slot, int doc) + { + docIDs[slot] = docBase + doc; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + // TODO: can we "map" our docIDs to the current + // reader? saves having to then subtract on every + // compare call + this.docBase = docBase; + } + + public override void SetBottom(int bottom) + { + this.bottom = docIDs[bottom]; + } + + public override IComparable this[int slot] + { + get { return (System.Int32) docIDs[slot]; } + } + } + + /// Parses field's values as double (using + /// and sorts by ascending value + /// + public sealed class DoubleComparator:FieldComparator + { + private double[] values; + private double[] currentReaderValues; + private System.String field; + private DoubleParser parser; + private double bottom; + + internal DoubleComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser) + { + values = new double[numHits]; + this.field = field; + this.parser = (DoubleParser) parser; + } + + public override int Compare(int slot1, int slot2) + { + double v1 = values[slot1]; + double v2 = values[slot2]; + if (v1 > v2) + { + return 1; + } + else if (v1 < v2) + { + return - 1; + } + else + { + return 0; + } + } + + public override int CompareBottom(int doc) + { + double v2 = currentReaderValues[doc]; + if (bottom > v2) + { + return 1; + } + else if (bottom < v2) + { + return - 1; + } + else + { + return 0; + } + } + + public override void Copy(int slot, int doc) + { + values[slot] = currentReaderValues[doc]; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetDoubles(reader, field, parser); + } + + public override void SetBottom(int bottom) + { + this.bottom = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return (double) values[slot]; } + } + } + + /// Parses field's values as float (using + /// and sorts by ascending value + /// + public sealed class FloatComparator:FieldComparator + { + private float[] values; + private float[] currentReaderValues; + private System.String field; + private FloatParser parser; + private float bottom; + + internal FloatComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser) + { + values = new float[numHits]; + this.field = field; + this.parser = (FloatParser) parser; + } + + public override int Compare(int slot1, int slot2) + { + // TODO: are there sneaky non-branch ways to compute + // sign of float? + float v1 = values[slot1]; + float v2 = values[slot2]; + if (v1 > v2) + { + return 1; + } + else if (v1 < v2) + { + return - 1; + } + else + { + return 0; + } + } + + public override int CompareBottom(int doc) + { + // TODO: are there sneaky non-branch ways to compute + // sign of float? + float v2 = currentReaderValues[doc]; + if (bottom > v2) + { + return 1; + } + else if (bottom < v2) + { + return - 1; + } + else + { + return 0; + } + } + + public override void Copy(int slot, int doc) + { + values[slot] = currentReaderValues[doc]; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetFloats(reader, field, parser); + } + + public override void SetBottom(int bottom) + { + this.bottom = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return (float) values[slot]; } + } + } + + /// Parses field's values as int (using + /// and sorts by ascending value + /// + public sealed class IntComparator:FieldComparator + { + private int[] values; + private int[] currentReaderValues; + private System.String field; + private IntParser parser; + private int bottom; // Value of bottom of queue + + internal IntComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser) + { + values = new int[numHits]; + this.field = field; + this.parser = (IntParser) parser; + } + + public override int Compare(int slot1, int slot2) + { + // TODO: there are sneaky non-branch ways to compute + // -1/+1/0 sign + // Cannot return values[slot1] - values[slot2] because that + // may overflow + int v1 = values[slot1]; + int v2 = values[slot2]; + if (v1 > v2) + { + return 1; + } + else if (v1 < v2) + { + return - 1; + } + else + { + return 0; + } + } + + public override int CompareBottom(int doc) + { + // TODO: there are sneaky non-branch ways to compute + // -1/+1/0 sign + // Cannot return bottom - values[slot2] because that + // may overflow + int v2 = currentReaderValues[doc]; + if (bottom > v2) + { + return 1; + } + else if (bottom < v2) + { + return - 1; + } + else + { + return 0; + } + } + + public override void Copy(int slot, int doc) + { + values[slot] = currentReaderValues[doc]; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetInts(reader, field, parser); + } + + public override void SetBottom(int bottom) + { + this.bottom = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return (System.Int32) values[slot]; } + } + } + + /// Parses field's values as long (using + /// and sorts by ascending value + /// + public sealed class LongComparator:FieldComparator + { + private long[] values; + private long[] currentReaderValues; + private System.String field; + private LongParser parser; + private long bottom; + + internal LongComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser) + { + values = new long[numHits]; + this.field = field; + this.parser = (LongParser) parser; + } + + public override int Compare(int slot1, int slot2) + { + // TODO: there are sneaky non-branch ways to compute + // -1/+1/0 sign + long v1 = values[slot1]; + long v2 = values[slot2]; + if (v1 > v2) + { + return 1; + } + else if (v1 < v2) + { + return - 1; + } + else + { + return 0; + } + } + + public override int CompareBottom(int doc) + { + // TODO: there are sneaky non-branch ways to compute + // -1/+1/0 sign + long v2 = currentReaderValues[doc]; + if (bottom > v2) + { + return 1; + } + else if (bottom < v2) + { + return - 1; + } + else + { + return 0; + } + } + + public override void Copy(int slot, int doc) + { + values[slot] = currentReaderValues[doc]; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetLongs(reader, field, parser); + } + + public override void SetBottom(int bottom) + { + this.bottom = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return (long) values[slot]; } + } + } + + /// Sorts by descending relevance. NOTE: if you are + /// sorting only by descending relevance and then + /// secondarily by ascending docID, peformance is faster + /// using directly (which + /// uses when no is + /// specified). + /// + public sealed class RelevanceComparator:FieldComparator + { + private float[] scores; + private float bottom; + private Scorer scorer; + + internal RelevanceComparator(int numHits) + { + scores = new float[numHits]; + } + + public override int Compare(int slot1, int slot2) + { + float score1 = scores[slot1]; + float score2 = scores[slot2]; + return score1 > score2?- 1:(score1 < score2?1:0); + } + + public override int CompareBottom(int doc) + { + float score = scorer.Score(); + return bottom > score?- 1:(bottom < score?1:0); + } + + public override void Copy(int slot, int doc) + { + scores[slot] = scorer.Score(); + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + } + + public override void SetBottom(int bottom) + { + this.bottom = scores[bottom]; + } + + public override void SetScorer(Scorer scorer) + { + // wrap with a ScoreCachingWrappingScorer so that successive calls to + // score() will not incur score computation over and over again. + this.scorer = new ScoreCachingWrappingScorer(scorer); + } + + public override IComparable this[int slot] + { + get { return (float) scores[slot]; } + } + } + + /// Parses field's values as short (using ) + /// and sorts by ascending value + /// + public sealed class ShortComparator:FieldComparator + { + private short[] values; + private short[] currentReaderValues; + private System.String field; + private ShortParser parser; + private short bottom; + + internal ShortComparator(int numHits, System.String field, Lucene.Net.Search.Parser parser) + { + values = new short[numHits]; + this.field = field; + this.parser = (ShortParser) parser; + } + + public override int Compare(int slot1, int slot2) + { + return values[slot1] - values[slot2]; + } + + public override int CompareBottom(int doc) + { + return bottom - currentReaderValues[doc]; + } + + public override void Copy(int slot, int doc) + { + values[slot] = currentReaderValues[doc]; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetShorts(reader, field, parser); + } + + public override void SetBottom(int bottom) + { + this.bottom = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return (short) values[slot]; } + } + } + + /// Sorts by a field's value using the Collator for a + /// given Locale. + /// + public sealed class StringComparatorLocale:FieldComparator + { + + private System.String[] values; + private System.String[] currentReaderValues; + private System.String field; + internal System.Globalization.CompareInfo collator; + private System.String bottom; + + internal StringComparatorLocale(int numHits, System.String field, System.Globalization.CultureInfo locale) + { + values = new System.String[numHits]; + this.field = field; + collator = locale.CompareInfo; + } + + public override int Compare(int slot1, int slot2) + { + System.String val1 = values[slot1]; + System.String val2 = values[slot2]; + if (val1 == null) + { + if (val2 == null) + { + return 0; + } + return - 1; + } + else if (val2 == null) + { + return 1; + } + return collator.Compare(val1.ToString(), val2.ToString()); + } + + public override int CompareBottom(int doc) + { + System.String val2 = currentReaderValues[doc]; + if (bottom == null) + { + if (val2 == null) + { + return 0; + } + return - 1; + } + else if (val2 == null) + { + return 1; + } + return collator.Compare(bottom.ToString(), val2.ToString()); + } + + public override void Copy(int slot, int doc) + { + values[slot] = currentReaderValues[doc]; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStrings(reader, field); + } + + public override void SetBottom(int bottom) + { + this.bottom = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return values[slot]; } + } + } + + /// Sorts by field's natural String sort order, using + /// ordinals. This is functionally equivalent to + ///, but it first resolves the string + /// to their relative ordinal positions (using the index + /// returned by ), and + /// does most comparisons using the ordinals. For medium + /// to large results, this comparator will be much faster + /// than . For very small + /// result sets it may be slower. + /// + public sealed class StringOrdValComparator:FieldComparator + { + + private int[] ords; + private System.String[] values; + private int[] readerGen; + + private int currentReaderGen = - 1; + private System.String[] lookup; + private int[] order; + private System.String field; + + private int bottomSlot = - 1; + private int bottomOrd; + private System.String bottomValue; + private bool reversed; + private int sortPos; + + public StringOrdValComparator(int numHits, System.String field, int sortPos, bool reversed) + { + ords = new int[numHits]; + values = new System.String[numHits]; + readerGen = new int[numHits]; + this.sortPos = sortPos; + this.reversed = reversed; + this.field = field; + } + + public override int Compare(int slot1, int slot2) + { + if (readerGen[slot1] == readerGen[slot2]) + { + int cmp = ords[slot1] - ords[slot2]; + if (cmp != 0) + { + return cmp; + } + } + + System.String val1 = values[slot1]; + System.String val2 = values[slot2]; + if (val1 == null) + { + if (val2 == null) + { + return 0; + } + return - 1; + } + else if (val2 == null) + { + return 1; + } + return String.CompareOrdinal(val1, val2); + } + + public override int CompareBottom(int doc) + { + System.Diagnostics.Debug.Assert(bottomSlot != - 1); + int order = this.order[doc]; + int cmp = bottomOrd - order; + if (cmp != 0) + { + return cmp; + } + + System.String val2 = lookup[order]; + if (bottomValue == null) + { + if (val2 == null) + { + return 0; + } + // bottom wins + return - 1; + } + else if (val2 == null) + { + // doc wins + return 1; + } + return String.CompareOrdinal(bottomValue, val2); + } + + private void Convert(int slot) + { + readerGen[slot] = currentReaderGen; + int index = 0; + System.String value_Renamed = values[slot]; + if (value_Renamed == null) + { + ords[slot] = 0; + return ; + } + + if (sortPos == 0 && bottomSlot != - 1 && bottomSlot != slot) + { + // Since we are the primary sort, the entries in the + // queue are bounded by bottomOrd: + System.Diagnostics.Debug.Assert(bottomOrd < lookup.Length); + if (reversed) + { + index = BinarySearch(lookup, value_Renamed, bottomOrd, lookup.Length - 1); + } + else + { + index = BinarySearch(lookup, value_Renamed, 0, bottomOrd); + } + } + else + { + // Full binary search + index = BinarySearch(lookup, value_Renamed); + } + + if (index < 0) + { + index = - index - 2; + } + ords[slot] = index; + } + + public override void Copy(int slot, int doc) + { + int ord = order[doc]; + ords[slot] = ord; + System.Diagnostics.Debug.Assert(ord >= 0); + values[slot] = lookup[ord]; + readerGen[slot] = currentReaderGen; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + StringIndex currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStringIndex(reader, field); + currentReaderGen++; + order = currentReaderValues.order; + lookup = currentReaderValues.lookup; + System.Diagnostics.Debug.Assert(lookup.Length > 0); + if (bottomSlot != - 1) + { + Convert(bottomSlot); + bottomOrd = ords[bottomSlot]; + } + } + + public override void SetBottom(int bottom) + { + bottomSlot = bottom; + if (readerGen[bottom] != currentReaderGen) + { + Convert(bottomSlot); + } + bottomOrd = ords[bottom]; + System.Diagnostics.Debug.Assert(bottomOrd >= 0); + System.Diagnostics.Debug.Assert(bottomOrd < lookup.Length); + bottomValue = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return values[slot]; } + } + + public string[] GetValues() + { + return values; + } + + public int BottomSlot + { + get { return bottomSlot; } + } + + public string Field + { + get { return field; } + } + } + + /// Sorts by field's natural String sort order. All + /// comparisons are done using String.compareTo, which is + /// slow for medium to large result sets but possibly + /// very fast for very small results sets. + /// + public sealed class StringValComparator:FieldComparator + { + + private System.String[] values; + private System.String[] currentReaderValues; + private System.String field; + private System.String bottom; + + internal StringValComparator(int numHits, System.String field) + { + values = new System.String[numHits]; + this.field = field; + } + + public override int Compare(int slot1, int slot2) + { + System.String val1 = values[slot1]; + System.String val2 = values[slot2]; + if (val1 == null) + { + if (val2 == null) + { + return 0; + } + return - 1; + } + else if (val2 == null) + { + return 1; + } + + return String.CompareOrdinal(val1, val2); + } + + public override int CompareBottom(int doc) + { + System.String val2 = currentReaderValues[doc]; + if (bottom == null) + { + if (val2 == null) + { + return 0; + } + return - 1; + } + else if (val2 == null) + { + return 1; + } + return String.CompareOrdinal(bottom, val2); + } + + public override void Copy(int slot, int doc) + { + values[slot] = currentReaderValues[doc]; + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + currentReaderValues = Lucene.Net.Search.FieldCache_Fields.DEFAULT.GetStrings(reader, field); + } + + public override void SetBottom(int bottom) + { + this.bottom = values[bottom]; + } + + public override IComparable this[int slot] + { + get { return values[slot]; } + } + } + + protected internal static int BinarySearch(System.String[] a, System.String key) + { + return BinarySearch(a, key, 0, a.Length - 1); + } + + protected internal static int BinarySearch(System.String[] a, System.String key, int low, int high) + { + + while (low <= high) + { + int mid = Number.URShift((low + high), 1); + System.String midVal = a[mid]; + int cmp; + if (midVal != null) + { + cmp = String.CompareOrdinal(midVal, key); + } + else + { + cmp = - 1; + } + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; + } + return - (low + 1); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldComparatorSource.cs b/external/Lucene.Net.Light/src/core/Search/FieldComparatorSource.cs new file mode 100644 index 0000000000..bb02fa9b14 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldComparatorSource.cs @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// Provides a for custom field sorting. + /// + /// NOTE: This API is experimental and might change in + /// incompatible ways in the next release. + /// + /// + [Serializable] + public abstract class FieldComparatorSource + { + + /// Creates a comparator for the field in the given index. + /// + /// + /// Name of the field to create comparator for. + /// + /// FieldComparator. + /// + /// IOException + /// If an error occurs reading the index. + /// + public abstract FieldComparator NewComparator(System.String fieldname, int numHits, int sortPos, bool reversed); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldDoc.cs b/external/Lucene.Net.Light/src/core/Search/FieldDoc.cs new file mode 100644 index 0000000000..b2730357c2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldDoc.cs @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// Expert: A ScoreDoc which also contains information about + /// how to sort the referenced document. In addition to the + /// document number and score, this object contains an array + /// of values for the document from the field(s) used to sort. + /// For example, if the sort criteria was to sort by fields + /// "a", "b" then "c", the fields object array + /// will have three elements, corresponding respectively to + /// the term values for the document in fields "a", "b" and "c". + /// The class of each element in the array will be either + /// Integer, Float or String depending on the type of values + /// in the terms of each field. + /// + ///

Created: Feb 11, 2004 1:23:38 PM + /// + ///

+ /// + /// + [Serializable] + public class FieldDoc:ScoreDoc + { + + /// Expert: The values which are used to sort the referenced document. + /// The order of these will match the original sort criteria given by a + /// Sort object. Each Object will be either an Integer, Float or String, + /// depending on the type of values in the terms of the original field. + /// + /// + /// + /// + /// + [NonSerialized] + public System.IComparable[] fields; + + /// Expert: Creates one of these objects with empty sort information. + public FieldDoc(int doc, float score):base(doc, score) + { + } + + /// Expert: Creates one of these objects with the given sort information. + public FieldDoc(int doc, float score, System.IComparable[] fields):base(doc, score) + { + this.fields = fields; + } + + // A convenience method for debugging. + public override System.String ToString() + { + // super.toString returns the doc and score information, so just add the + // fields information + System.Text.StringBuilder sb = new System.Text.StringBuilder(base.ToString()); + sb.Append("["); + for (int i = 0; i < fields.Length; i++) + { + sb.Append(fields[i]).Append(", "); + } + sb.Length -= 2; // discard last ", " + sb.Append("]"); + return sb.ToString(); + } + + #region SERIALIZATION + internal object[] fieldsClone = null; + + [System.Runtime.Serialization.OnSerializing] + void OnSerializing(System.Runtime.Serialization.StreamingContext context) + { + if (fields == null) return; + + // Copy "fields" to "fieldsClone" + fieldsClone = new object[fields.Length]; + for (int i = 0; i < fields.Length; i++) + { + fieldsClone[i] = fields[i]; + } + } + + [System.Runtime.Serialization.OnDeserialized] + void OnDeserialized(System.Runtime.Serialization.StreamingContext context) + { + if (fieldsClone == null) return; + + // Form "fields" from "fieldsClone" + fields = new IComparable[fieldsClone.Length]; + for (int i = 0; i < fields.Length; i++) + { + fields[i] = (IComparable)fieldsClone[i]; + } + } + #endregion + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldDocSortedHitQueue.cs b/external/Lucene.Net.Light/src/core/Search/FieldDocSortedHitQueue.cs new file mode 100644 index 0000000000..46a450cf6b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldDocSortedHitQueue.cs @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Search +{ + + /// Expert: Collects sorted results from Searchable's and collates them. + /// The elements put into this queue must be of type FieldDoc. + /// + ///

Created: Feb 11, 2004 2:04:21 PM + /// + ///

+ /// lucene 1.4 + /// + class FieldDocSortedHitQueue : PriorityQueue + { + internal volatile SortField[] fields = null; + + // used in the case where the fields are sorted by locale + // based strings + internal volatile System.Globalization.CompareInfo[] collators; + + + /// Creates a hit queue sorted by the given list of fields. + /// The number of hits to retain. Must be greater than zero. + internal FieldDocSortedHitQueue(int size) + { + Initialize(size); + } + + + /// Allows redefinition of sort fields if they are null. + /// This is to handle the case using ParallelMultiSearcher where the + /// original list contains AUTO and we don't know the actual sort + /// type until the values come back. The fields can only be set once. + /// This method is thread safe. + /// + /// + internal virtual void SetFields(SortField[] fields) + { + lock (this) + { + this.fields = fields; + this.collators = HasCollators(fields); + } + } + + /// Returns the fields being used to sort. + internal virtual SortField[] GetFields() + { + return fields; + } + + + /// Returns an array of collators, possibly null. The collators + /// correspond to any SortFields which were given a specific locale. + /// + /// Array of sort fields. + /// Array, possibly null. + private System.Globalization.CompareInfo[] HasCollators(SortField[] fields) + { + if (fields == null) + return null; + System.Globalization.CompareInfo[] ret = new System.Globalization.CompareInfo[fields.Length]; + for (int i = 0; i < fields.Length; ++i) + { + System.Globalization.CultureInfo locale = fields[i].Locale; + if (locale != null) + ret[i] = locale.CompareInfo; + } + return ret; + } + + + /// Returns whether a is less relevant than b. + /// ScoreDoc + /// ScoreDoc + /// true if document a should be sorted after document b. + public override bool LessThan(FieldDoc docA, FieldDoc docB) + { + int n = fields.Length; + int c = 0; + for (int i = 0; i < n && c == 0; ++i) + { + int type = fields[i].Type; + if(type == SortField.STRING) + { + string s1 = (string) docA.fields[i]; + string s2 = (string) docB.fields[i]; + // null values need to be sorted first, because of how FieldCache.getStringIndex() + // works - in that routine, any documents without a value in the given field are + // put first. If both are null, the next SortField is used + if (s1 == null) + { + c = (s2 == null) ? 0 : -1; + } + else if (s2 == null) + { + c = 1; + } + else if (fields[i].Locale == null) + { + c = s1.CompareTo(s2); + } + else + { + c = collators[i].Compare(s1, s2); + } + } + else + { + c = docA.fields[i].CompareTo(docB.fields[i]); + if (type == SortField.SCORE) + { + c = -c; + } + } + if (fields[i].Reverse) + { + c = - c; + } + } + + // avoid random sort order that could lead to duplicates (bug #31241): + if (c == 0) + return docA.Doc > docB.Doc; + + return c > 0; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FieldValueHitQueue.cs b/external/Lucene.Net.Light/src/core/Search/FieldValueHitQueue.cs new file mode 100644 index 0000000000..752a564b8f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FieldValueHitQueue.cs @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Search +{ + + /// Expert: A hit queue for sorting by hits by terms in more than one field. + /// Uses FieldCache.DEFAULT for maintaining + /// internal term lookup tables. + /// + /// NOTE: This API is experimental and might change in + /// incompatible ways in the next release. + /// + /// + /// + /// + public abstract class FieldValueHitQueue : PriorityQueue + { + // had to change from internal to public, due to public accessability of FieldValueHitQueue + public /*internal*/ sealed class Entry : ScoreDoc + { + internal int slot; + + internal Entry(int slot, int doc, float score) + : base(doc, score) + { + + this.slot = slot; + } + + public override System.String ToString() + { + return "slot:" + slot + " " + base.ToString(); + } + } + + /// An implementation of which is optimized in case + /// there is just one comparator. + /// + private sealed class OneComparatorFieldValueHitQueue : FieldValueHitQueue + { + + private FieldComparator comparator; + private int oneReverseMul; + + public OneComparatorFieldValueHitQueue(SortField[] fields, int size):base(fields) + { + if (fields.Length == 0) + { + throw new System.ArgumentException("Sort must contain at least one field"); + } + + SortField field = fields[0]; + comparator = field.GetComparator(size, 0); + oneReverseMul = field.reverse?- 1:1; + + comparators[0] = comparator; + reverseMul[0] = oneReverseMul; + + Initialize(size); + } + + /// Returns whether a is less relevant than b. + /// ScoreDoc + /// ScoreDoc + /// true if document a should be sorted after document b. + public override bool LessThan(Entry hitA, Entry hitB) + { + System.Diagnostics.Debug.Assert(hitA != hitB); + System.Diagnostics.Debug.Assert(hitA.slot != hitB.slot); + + int c = oneReverseMul * comparator.Compare(hitA.slot, hitB.slot); + if (c != 0) + { + return c > 0; + } + + // avoid random sort order that could lead to duplicates (bug #31241): + return hitA.Doc > hitB.Doc; + } + } + + /// An implementation of which is optimized in case + /// there is more than one comparator. + /// + private sealed class MultiComparatorsFieldValueHitQueue : FieldValueHitQueue + { + + public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size):base(fields) + { + + int numComparators = comparators.Length; + for (int i = 0; i < numComparators; ++i) + { + SortField field = fields[i]; + + reverseMul[i] = field.reverse?- 1:1; + comparators[i] = field.GetComparator(size, i); + } + + Initialize(size); + } + + public override bool LessThan(Entry hitA, Entry hitB) + { + System.Diagnostics.Debug.Assert(hitA != hitB); + System.Diagnostics.Debug.Assert(hitA.slot != hitB.slot); + + int numComparators = comparators.Length; + for (int i = 0; i < numComparators; ++i) + { + int c = reverseMul[i] * comparators[i].Compare(hitA.slot, hitB.slot); + if (c != 0) + { + // Short circuit + return c > 0; + } + } + + // avoid random sort order that could lead to duplicates (bug #31241): + return hitA.Doc > hitB.Doc; + } + } + + // prevent instantiation and extension. + private FieldValueHitQueue(SortField[] fields) + { + // When we get here, fields.length is guaranteed to be > 0, therefore no + // need to check it again. + + // All these are required by this class's API - need to return arrays. + // Therefore even in the case of a single comparator, create an array + // anyway. + this.fields = fields; + int numComparators = fields.Length; + comparators = new FieldComparator[numComparators]; + reverseMul = new int[numComparators]; + } + + /// Creates a hit queue sorted by the given list of fields. + /// + ///

NOTE: The instances returned by this method + /// pre-allocate a full array of length numHits. + /// + ///

+ /// SortField array we are sorting by in priority order (highest + /// priority first); cannot be null or empty + /// + /// The number of hits to retain. Must be greater than zero. + /// + /// IOException + public static FieldValueHitQueue Create(SortField[] fields, int size) + { + + if (fields.Length == 0) + { + throw new System.ArgumentException("Sort must contain at least one field"); + } + + if (fields.Length == 1) + { + return new OneComparatorFieldValueHitQueue(fields, size); + } + else + { + return new MultiComparatorsFieldValueHitQueue(fields, size); + } + } + + internal virtual FieldComparator[] GetComparators() + { + return comparators; + } + + internal virtual int[] GetReverseMul() + { + return reverseMul; + } + + /// Stores the sort criteria being used. + protected internal SortField[] fields; + protected internal FieldComparator[] comparators; + protected internal int[] reverseMul; + + public abstract override bool LessThan(Entry a, Entry b); + + /// Given a queue Entry, creates a corresponding FieldDoc + /// that contains the values used to sort the given document. + /// These values are not the raw values out of the index, but the internal + /// representation of them. This is so the given search hit can be collated by + /// a MultiSearcher with other search hits. + /// + /// + /// The Entry used to create a FieldDoc + /// + /// The newly created FieldDoc + /// + /// + /// + internal virtual FieldDoc FillFields(Entry entry) + { + int n = comparators.Length; + System.IComparable[] fields = new System.IComparable[n]; + for (int i = 0; i < n; ++i) + { + fields[i] = comparators[i][entry.slot]; + } + //if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores + return new FieldDoc(entry.Doc, entry.Score, fields); + } + + /// Returns the SortFields being used by this hit queue. + internal virtual SortField[] GetFields() + { + return fields; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Filter.cs b/external/Lucene.Net.Light/src/core/Search/Filter.cs new file mode 100644 index 0000000000..f4f1f24bf3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Filter.cs @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using DocIdBitSet = Lucene.Net.Util.DocIdBitSet; + +namespace Lucene.Net.Search +{ + + /// Abstract base class for restricting which documents may be returned during searching. + [Serializable] + public abstract class Filter + { + /// + /// Creates a enumerating the documents that should be + /// permitted in search results. NOTE: null can be + /// returned if no documents are accepted by this Filter. + ///

+ /// Note: This method will be called once per segment in + /// the index during searching. The returned + /// must refer to document IDs for that segment, not for + /// the top-level reader. + ///

+ /// a DocIdSet that provides the documents which should be permitted or + /// prohibited in search results. NOTE: null can be returned if + /// no documents will be accepted by this Filter. + /// + /// + /// A instance opened on the index currently + /// searched on. Note, it is likely that the provided reader does not + /// represent the whole underlying index i.e. if the index has more than + /// one segment the given reader only represents a single segment. + /// + /// + /// + public abstract DocIdSet GetDocIdSet(IndexReader reader); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FilterManager.cs b/external/Lucene.Net.Light/src/core/Search/FilterManager.cs new file mode 100644 index 0000000000..1afdc0b924 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FilterManager.cs @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Support; + +namespace Lucene.Net.Search +{ + + /// Filter caching singleton. It can be used + /// to save filters locally for reuse. + /// This class makes it possble to cache Filters even when using RMI, as it + /// keeps the cache on the seaercher side of the RMI connection. + /// + /// Also could be used as a persistent storage for any filter as long as the + /// filter provides a proper hashCode(), as that is used as the key in the cache. + /// + /// The cache is periodically cleaned up from a separate thread to ensure the + /// cache doesn't exceed the maximum size. + /// + public class FilterManager + { + + protected internal static FilterManager manager; + + /// The default maximum number of Filters in the cache + protected internal const int DEFAULT_CACHE_CLEAN_SIZE = 100; + /// The default frequency of cache clenup + protected internal const long DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10; + + /// The cache itself + protected internal IDictionary cache; + /// Maximum allowed cache size + protected internal int cacheCleanSize; + /// Cache cleaning frequency + protected internal long cleanSleepTime; + /// Cache cleaner that runs in a separate thread + protected internal FilterCleaner internalFilterCleaner; + + private static readonly object _staticSyncObj = new object(); + public static FilterManager Instance + { + get + { + lock (_staticSyncObj) + { + return manager ?? (manager = new FilterManager()); + } + } + } + + /// Sets up the FilterManager singleton. + protected internal FilterManager() + { + cache = new HashMap(); + cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items + cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings + + internalFilterCleaner = new FilterCleaner(this); + ThreadClass fcThread = new ThreadClass(new System.Threading.ThreadStart(internalFilterCleaner.Run)); + // setto be a Daemon so it doesn't have to be stopped + fcThread.IsBackground = true; + fcThread.Start(); + } + + /// Sets the max size that cache should reach before it is cleaned up + /// maximum allowed cache size + public virtual void SetCacheSize(int value) + { + this.cacheCleanSize = value; + } + + /// Sets the cache cleaning frequency in milliseconds. + /// cleaning frequency in millioseconds + public virtual void SetCleanThreadSleepTime(long value) + { + this.cleanSleepTime = value; + } + + /// Returns the cached version of the filter. Allows the caller to pass up + /// a small filter but this will keep a persistent version around and allow + /// the caching filter to do its job. + /// + /// + /// The input filter + /// + /// The cached version of the filter + /// + public virtual Filter GetFilter(Filter filter) + { + lock (cache) + { + FilterItem fi = null; + fi = cache[filter.GetHashCode()]; + if (fi != null) + { + fi.timestamp = System.DateTime.UtcNow.Ticks; + return fi.filter; + } + cache[filter.GetHashCode()] = new FilterItem(filter); + return filter; + } + } + + /// Holds the filter and the last time the filter was used, to make LRU-based + /// cache cleaning possible. + /// TODO: Clean this up when we switch to Java 1.5 + /// + protected internal class FilterItem + { + public Filter filter; + public long timestamp; + + public FilterItem(Filter filter) + { + this.filter = filter; + this.timestamp = System.DateTime.UtcNow.Ticks; + } + } + + + /// Keeps the cache from getting too big. + /// If we were using Java 1.5, we could use LinkedHashMap and we would not need this thread + /// to clean out the cache. + /// + /// The SortedSet sortedFilterItems is used only to sort the items from the cache, + /// so when it's time to clean up we have the TreeSet sort the FilterItems by + /// timestamp. + /// + /// Removes 1.5 * the numbers of items to make the cache smaller. + /// For example: + /// If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 round up to 8. + /// This way we clean the cache a bit more, and avoid having the cache cleaner having to do it frequently. + /// + protected internal class FilterCleaner : IThreadRunnable + { + private class FilterItemComparer : IComparer> + { + #region IComparer Members + + public int Compare(KeyValuePair x, KeyValuePair y) + { + return x.Value.timestamp.CompareTo(y.Value.timestamp); + } + + #endregion + } + + private bool running = true; + private FilterManager manager; + private ISet> sortedFilterItems; + + public FilterCleaner(FilterManager enclosingInstance) + { + this.manager = enclosingInstance; + sortedFilterItems = new SortedSet>(new FilterItemComparer()); + } + + public virtual void Run() + { + while (running) + { + // sort items from oldest to newest + // we delete the oldest filters + if (this.manager.cache.Count > this.manager.cacheCleanSize) + { + // empty the temporary set + sortedFilterItems.Clear(); + lock (this.manager.cache) + { + sortedFilterItems.UnionWith(this.manager.cache); + int numToDelete = (int)((this.manager.cache.Count - this.manager.cacheCleanSize) * 1.5); + + //delete all of the cache entries not used in a while + sortedFilterItems.ExceptWith(sortedFilterItems.Take(numToDelete).ToArray()); + } + // empty the set so we don't tie up the memory + sortedFilterItems.Clear(); + } + // take a nap + System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64)10000 * this.manager.cleanSleepTime)); + + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FilteredDocIdSet.cs b/external/Lucene.Net.Light/src/core/Search/FilteredDocIdSet.cs new file mode 100644 index 0000000000..cd590d371e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FilteredDocIdSet.cs @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// Abstract decorator class for a DocIdSet implementation + /// that provides on-demand filtering/validation + /// mechanism on a given DocIdSet. + /// + ///

+ /// + /// Technically, this same functionality could be achieved + /// with ChainedFilter (under contrib/misc), however the + /// benefit of this class is it never materializes the full + /// bitset for the filter. Instead, the + /// method is invoked on-demand, per docID visited during + /// searching. If you know few docIDs will be visited, and + /// the logic behind is relatively costly, + /// this may be a better way to filter than ChainedFilter. + /// + ///

+ /// + /// + + public abstract class FilteredDocIdSet:DocIdSet + { + private class AnonymousClassFilteredDocIdSetIterator:FilteredDocIdSetIterator + { + public AnonymousClassFilteredDocIdSetIterator(FilteredDocIdSet enclosingInstance) : base(null) + { + System.Diagnostics.Debug.Fail("Port issue:", "Lets see if we need this"); // {{Aroush-2.9}} + InitBlock(enclosingInstance); + } + private void InitBlock(FilteredDocIdSet enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private FilteredDocIdSet enclosingInstance; + public FilteredDocIdSet Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassFilteredDocIdSetIterator(FilteredDocIdSet enclosingInstance, Lucene.Net.Search.DocIdSetIterator Param1):base(Param1) + { + InitBlock(enclosingInstance); + } + public /*protected internal*/ override bool Match(int docid) + { + return Enclosing_Instance.Match(docid); + } + } + private DocIdSet _innerSet; + + /// Constructor. + /// Underlying DocIdSet + /// + protected FilteredDocIdSet(DocIdSet innerSet) + { + _innerSet = innerSet; + } + + /// This DocIdSet implementation is cacheable if the inner set is cacheable. + public override bool IsCacheable + { + get { return _innerSet.IsCacheable; } + } + + /// Validation method to determine whether a docid should be in the result set. + /// docid to be tested + /// + /// true if input docid should be in the result set, false otherwise. + /// + public /*protected internal*/ abstract bool Match(int docid); + + /// Implementation of the contract to build a DocIdSetIterator. + /// + /// + /// + /// + // @Override + public override DocIdSetIterator Iterator() + { + return new AnonymousClassFilteredDocIdSetIterator(this, _innerSet.Iterator()); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FilteredDocIdSetIterator.cs b/external/Lucene.Net.Light/src/core/Search/FilteredDocIdSetIterator.cs new file mode 100644 index 0000000000..29e93b51b3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FilteredDocIdSetIterator.cs @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// Abstract decorator class of a DocIdSetIterator + /// implementation that provides on-demand filter/validation + /// mechanism on an underlying DocIdSetIterator. See + ///. + /// + public abstract class FilteredDocIdSetIterator:DocIdSetIterator + { + protected internal DocIdSetIterator internalInnerIter; + private int doc; + + /// Constructor. + /// Underlying DocIdSetIterator. + /// + protected FilteredDocIdSetIterator(DocIdSetIterator innerIter) + { + if (innerIter == null) + { + throw new System.ArgumentException("null iterator"); + } + internalInnerIter = innerIter; + doc = - 1; + } + + /// Validation method to determine whether a docid should be in the result set. + /// docid to be tested + /// + /// true if input docid should be in the result set, false otherwise. + /// + /// + /// + public abstract /*protected internal*/ bool Match(int doc); + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + while ((doc = internalInnerIter.NextDoc()) != NO_MORE_DOCS) + { + if (Match(doc)) + { + return doc; + } + } + return doc; + } + + public override int Advance(int target) + { + doc = internalInnerIter.Advance(target); + if (doc != NO_MORE_DOCS) + { + if (Match(doc)) + { + return doc; + } + else + { + while ((doc = internalInnerIter.NextDoc()) != NO_MORE_DOCS) + { + if (Match(doc)) + { + return doc; + } + } + return doc; + } + } + return doc; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FilteredQuery.cs b/external/Lucene.Net.Light/src/core/Search/FilteredQuery.cs new file mode 100644 index 0000000000..d60a75b24b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FilteredQuery.cs @@ -0,0 +1,293 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search +{ + + + /// A query that applies a filter to the results of another query. + /// + ///

Note: the bits are retrieved from the filter each time this + /// query is used in a search - use a CachingWrapperFilter to avoid + /// regenerating the bits every time. + /// + ///

Created: Apr 20, 2004 8:58:29 AM + /// + ///

+ /// 1.4 + /// + [Serializable] + public class FilteredQuery:Query + { + [Serializable] + private class AnonymousClassWeight:Weight + { + public AnonymousClassWeight(Lucene.Net.Search.Weight weight, Lucene.Net.Search.Similarity similarity, FilteredQuery enclosingInstance) + { + InitBlock(weight, similarity, enclosingInstance); + } + private class AnonymousClassScorer:Scorer + { + private void InitBlock(Lucene.Net.Search.Scorer scorer, Lucene.Net.Search.DocIdSetIterator docIdSetIterator, AnonymousClassWeight enclosingInstance) + { + this.scorer = scorer; + this.docIdSetIterator = docIdSetIterator; + this.enclosingInstance = enclosingInstance; + } + private Lucene.Net.Search.Scorer scorer; + private Lucene.Net.Search.DocIdSetIterator docIdSetIterator; + private AnonymousClassWeight enclosingInstance; + public AnonymousClassWeight Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal AnonymousClassScorer(Lucene.Net.Search.Scorer scorer, Lucene.Net.Search.DocIdSetIterator docIdSetIterator, AnonymousClassWeight enclosingInstance, Lucene.Net.Search.Similarity Param1):base(Param1) + { + InitBlock(scorer, docIdSetIterator, enclosingInstance); + } + + private int doc = - 1; + + private int AdvanceToCommon(int scorerDoc, int disiDoc) + { + while (scorerDoc != disiDoc) + { + if (scorerDoc < disiDoc) + { + scorerDoc = scorer.Advance(disiDoc); + } + else + { + disiDoc = docIdSetIterator.Advance(scorerDoc); + } + } + return scorerDoc; + } + + public override int NextDoc() + { + int scorerDoc, disiDoc; + return doc = (disiDoc = docIdSetIterator.NextDoc()) != NO_MORE_DOCS && (scorerDoc = scorer.NextDoc()) != NO_MORE_DOCS && AdvanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS?scorer.DocID():NO_MORE_DOCS; + } + public override int DocID() + { + return doc; + } + + public override int Advance(int target) + { + int disiDoc, scorerDoc; + return doc = (disiDoc = docIdSetIterator.Advance(target)) != NO_MORE_DOCS && (scorerDoc = scorer.Advance(disiDoc)) != NO_MORE_DOCS && AdvanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS?scorer.DocID():NO_MORE_DOCS; + } + + public override float Score() + { + return Enclosing_Instance.Enclosing_Instance.Boost * scorer.Score(); + } + } + private void InitBlock(Lucene.Net.Search.Weight weight, Lucene.Net.Search.Similarity similarity, FilteredQuery enclosingInstance) + { + this.weight = weight; + this.similarity = similarity; + this.enclosingInstance = enclosingInstance; + } + private Lucene.Net.Search.Weight weight; + private Lucene.Net.Search.Similarity similarity; + private FilteredQuery enclosingInstance; + public FilteredQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private float value_Renamed; + + // pass these methods through to enclosed query's weight + + public override float Value + { + get { return value_Renamed; } + } + + public override float GetSumOfSquaredWeights() + { + return weight.GetSumOfSquaredWeights()*Enclosing_Instance.Boost*Enclosing_Instance.Boost; + } + + public override void Normalize(float v) + { + weight.Normalize(v); + value_Renamed = weight.Value * Enclosing_Instance.Boost; + } + public override Explanation Explain(IndexReader ir, int i) + { + Explanation inner = weight.Explain(ir, i); + if (Enclosing_Instance.Boost != 1) + { + Explanation preBoost = inner; + inner = new Explanation(inner.Value * Enclosing_Instance.Boost, "product of:"); + inner.AddDetail(new Explanation(Enclosing_Instance.Boost, "boost")); + inner.AddDetail(preBoost); + } + Filter f = Enclosing_Instance.filter; + DocIdSet docIdSet = f.GetDocIdSet(ir); + DocIdSetIterator docIdSetIterator = docIdSet == null?DocIdSet.EMPTY_DOCIDSET.Iterator():docIdSet.Iterator(); + if (docIdSetIterator == null) + { + docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.Iterator(); + } + if (docIdSetIterator.Advance(i) == i) + { + return inner; + } + else + { + Explanation result = new Explanation(0.0f, "failure to match filter: " + f.ToString()); + result.AddDetail(inner); + return result; + } + } + + // return this query + + public override Query Query + { + get { return Enclosing_Instance; } + } + + // return a filtering scorer + public override Scorer Scorer(IndexReader indexReader, bool scoreDocsInOrder, bool topScorer) + { + Scorer scorer = weight.Scorer(indexReader, true, false); + if (scorer == null) + { + return null; + } + DocIdSet docIdSet = Enclosing_Instance.filter.GetDocIdSet(indexReader); + if (docIdSet == null) + { + return null; + } + DocIdSetIterator docIdSetIterator = docIdSet.Iterator(); + if (docIdSetIterator == null) + { + return null; + } + + return new AnonymousClassScorer(scorer, docIdSetIterator, this, similarity); + } + } + + internal Query query; + internal Filter filter; + + /// Constructs a new query which applies a filter to the results of the original query. + /// Filter.getDocIdSet() will be called every time this query is used in a search. + /// + /// Query to be filtered, cannot be null. + /// + /// Filter to apply to query results, cannot be null. + /// + public FilteredQuery(Query query, Filter filter) + { + this.query = query; + this.filter = filter; + } + + /// Returns a Weight that applies the filter to the enclosed query's Weight. + /// This is accomplished by overriding the Scorer returned by the Weight. + /// + public override Weight CreateWeight(Searcher searcher) + { + Weight weight = query.CreateWeight(searcher); + Similarity similarity = query.GetSimilarity(searcher); + return new AnonymousClassWeight(weight, similarity, this); + } + + /// Rewrites the wrapped query. + public override Query Rewrite(IndexReader reader) + { + Query rewritten = query.Rewrite(reader); + if (rewritten != query) + { + FilteredQuery clone = (FilteredQuery) this.Clone(); + clone.query = rewritten; + return clone; + } + else + { + return this; + } + } + + public virtual Query Query + { + get { return query; } + } + + public virtual Filter Filter + { + get { return filter; } + } + + // inherit javadoc + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + Query.ExtractTerms(terms); + } + + /// Prints a user-readable version of this query. + public override System.String ToString(System.String s) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("filtered("); + buffer.Append(query.ToString(s)); + buffer.Append(")->"); + buffer.Append(filter); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + /// Returns true iff o is equal to this. + public override bool Equals(System.Object o) + { + if (o is FilteredQuery) + { + FilteredQuery fq = (FilteredQuery) o; + return (query.Equals(fq.query) && filter.Equals(fq.filter) && Boost == fq.Boost); + } + return false; + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + return query.GetHashCode() ^ filter.GetHashCode() + System.Convert.ToInt32(Boost); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FilteredTermEnum.cs b/external/Lucene.Net.Light/src/core/Search/FilteredTermEnum.cs new file mode 100644 index 0000000000..8c6e4283dc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FilteredTermEnum.cs @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Term = Lucene.Net.Index.Term; +using TermEnum = Lucene.Net.Index.TermEnum; + +namespace Lucene.Net.Search +{ + + /// Abstract class for enumerating a subset of all terms. + ///

Term enumerations are always ordered by Term.compareTo(). Each term in + /// the enumeration is greater than all that precede it. + ///

+ public abstract class FilteredTermEnum:TermEnum + { + /// the current term + protected internal Term currentTerm = null; + + /// the delegate enum - to set this member use + protected internal TermEnum actualEnum = null; + + protected FilteredTermEnum() + { + } + + /// Equality compare on the term + protected internal abstract bool TermCompare(Term term); + + /// Equality measure on the term + public abstract float Difference(); + + /// Indicates the end of the enumeration has been reached + public abstract bool EndEnum(); + + private bool isDisposed; + + /// use this method to set the actual TermEnum (e.g. in ctor), + /// it will be automatically positioned on the first matching term. + /// + protected internal virtual void SetEnum(TermEnum actualEnum) + { + this.actualEnum = actualEnum; + // Find the first term that matches + Term term = actualEnum.Term; + if (term != null && TermCompare(term)) + currentTerm = term; + else + Next(); + } + + /// Returns the docFreq of the current Term in the enumeration. + /// Returns -1 if no Term matches or all terms have been enumerated. + /// + public override int DocFreq() + { + if (currentTerm == null) + return - 1; + System.Diagnostics.Debug.Assert(actualEnum != null); + return actualEnum.DocFreq(); + } + + /// Increments the enumeration to the next element. True if one exists. + public override bool Next() + { + if (actualEnum == null) + return false; // the actual enumerator is not initialized! + currentTerm = null; + while (currentTerm == null) + { + if (EndEnum()) + return false; + if (actualEnum.Next()) + { + Term term = actualEnum.Term; + if (TermCompare(term)) + { + currentTerm = term; + return true; + } + } + else + return false; + } + currentTerm = null; + return false; + } + + /// Returns the current Term in the enumeration. + /// Returns null if no Term matches or all terms have been enumerated. + /// + public override Term Term + { + get { return currentTerm; } + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (actualEnum != null) + actualEnum.Close(); + currentTerm = null; + actualEnum = null; + } + + isDisposed = true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Function/ByteFieldSource.cs b/external/Lucene.Net.Light/src/core/Search/Function/ByteFieldSource.cs new file mode 100644 index 0000000000..edebbdbd64 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Function/ByteFieldSource.cs @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using FieldCache = Lucene.Net.Search.FieldCache; + +namespace Lucene.Net.Search.Function +{ + + /// Expert: obtains single byte field values from the + /// FieldCache + /// using getBytes() and makes those values + /// available as other numeric types, casting as needed. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + ///

+ /// for requirements" + /// on the field. + /// + ///

NOTE: with the switch in 2.9 to segment-based + /// searching, if is invoked with a + /// composite (multi-segment) reader, this can easily cause + /// double RAM usage for the values in the FieldCache. It's + /// best to switch your application to pass only atomic + /// (single segment) readers to this API.

+ /// + [Serializable] + public class ByteFieldSource:FieldCacheSource + { + private class AnonymousClassDocValues:DocValues + { + public AnonymousClassDocValues(sbyte[] arr, ByteFieldSource enclosingInstance) + { + InitBlock(arr, enclosingInstance); + } + private void InitBlock(sbyte[] arr, ByteFieldSource enclosingInstance) + { + this.arr = arr; + this.enclosingInstance = enclosingInstance; + } + private sbyte[] arr; + private ByteFieldSource enclosingInstance; + public ByteFieldSource Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + /*(non-Javadoc) Create a cached byte field source with default string-to-byte parser. + public ByteFieldSource(System.String field):this(field, null) + { + } + + ///

Create a cached byte field source with a specific string-to-byte parser. + public ByteFieldSource(System.String field, Lucene.Net.Search.ByteParser parser):base(field) + { + this.parser = parser; + } + + /*(non-Javadoc) + /// An instance of this subclass should be returned by + /// , if you want + /// to modify the custom score calculation of a . + /// Since Lucene 2.9, queries operate on each segment of an Index separately, + /// so overriding the similar (now deprecated) methods in + /// is no longer suitable, as the supplied doc ID is per-segment + /// and without knowledge of the IndexReader you cannot access the + /// document or . + /// + /// @lucene.experimental + /// @since 2.9.2 + /// + public class CustomScoreProvider + { + + protected IndexReader reader; + + /// + /// Creates a new instance of the provider class for the given IndexReader. + /// + public CustomScoreProvider(IndexReader reader) + { + this.reader = reader; + } + + /// + /// * Compute a custom score by the subQuery score and a number of + /// ValueSourceQuery scores. + ///

+ /// Subclasses can override this method to modify the custom score. + ///

+ /// If your custom scoring is different than the default herein you + /// should override at least one of the two customScore() methods. + /// If the number of ValueSourceQueries is always < 2 it is + /// sufficient to override the other + /// CustomScore() + /// method, which is simpler. + ///

+ /// The default computation herein is a multiplication of given scores: + ///

+        ///     ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
+        /// 
+ ///
+ /// id of scored doc + /// score of that doc by the subQuery + /// scores of that doc by the ValueSourceQuery + /// custom score + public virtual float CustomScore(int doc, float subQueryScore, float[] valSrcScores) + { + if (valSrcScores.Length == 1) + { + return CustomScore(doc, subQueryScore, valSrcScores[0]); + } + if (valSrcScores.Length == 0) + { + return CustomScore(doc, subQueryScore, 1); + } + float score = subQueryScore; + for (int i = 0; i < valSrcScores.Length; i++) + { + score *= valSrcScores[i]; + } + return score; + } + + /// + /// Compute a custom score by the subQuery score and the ValueSourceQuery score. + ///

+ /// Subclasses can override this method to modify the custom score. + ///

+ /// If your custom scoring is different than the default herein you + /// should override at least one of the two customScore() methods. + /// If the number of ValueSourceQueries is always < 2 it is + /// sufficient to override this customScore() method, which is simpler. + ///

+ /// The default computation herein is a multiplication of the two scores: + ///

+        ///     ModifiedScore = subQueryScore * valSrcScore
+        /// 
+ ///
+ /// id of scored doc + /// score of that doc by the subQuery + /// score of that doc by the ValueSourceQuery + /// custom score + public virtual float CustomScore(int doc, float subQueryScore, float valSrcScore) + { + return subQueryScore * valSrcScore; + } + + /// + /// Explain the custom score. + /// Whenever overriding , + /// this method should also be overridden to provide the correct explanation + /// for the part of the custom scoring. + /// + /// doc being explained + /// explanation for the sub-query part + /// explanation for the value source part + /// an explanation for the custom score + public virtual Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation[] valSrcExpls) + { + if (valSrcExpls.Length == 1) + { + return CustomExplain(doc, subQueryExpl, valSrcExpls[0]); + } + if (valSrcExpls.Length == 0) + { + return subQueryExpl; + } + float valSrcScore = 1; + for (int i = 0; i < valSrcExpls.Length; i++) + { + valSrcScore *= valSrcExpls[i].Value; + } + Explanation exp = new Explanation(valSrcScore * subQueryExpl.Value, "custom score: product of:"); + exp.AddDetail(subQueryExpl); + for (int i = 0; i < valSrcExpls.Length; i++) + { + exp.AddDetail(valSrcExpls[i]); + } + return exp; + } + + /// + /// Explain the custom score. + /// Whenever overriding , + /// this method should also be overridden to provide the correct explanation + /// for the part of the custom scoring. + /// + /// + /// doc being explained + /// explanation for the sub-query part + /// explanation for the value source part + /// an explanation for the custom score + public virtual Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) + { + float valSrcScore = 1; + if (valSrcExpl != null) + { + valSrcScore *= valSrcExpl.Value; + } + Explanation exp = new Explanation(valSrcScore * subQueryExpl.Value, "custom score: product of:"); + exp.AddDetail(subQueryExpl); + exp.AddDetail(valSrcExpl); + return exp; + } + + } +} diff --git a/external/Lucene.Net.Light/src/core/Search/Function/CustomScoreQuery.cs b/external/Lucene.Net.Light/src/core/Search/Function/CustomScoreQuery.cs new file mode 100644 index 0000000000..cd6f2b2298 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Function/CustomScoreQuery.cs @@ -0,0 +1,579 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using Lucene.Net.Index; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using ComplexExplanation = Lucene.Net.Search.ComplexExplanation; +using Explanation = Lucene.Net.Search.Explanation; +using Query = Lucene.Net.Search.Query; +using Scorer = Lucene.Net.Search.Scorer; +using Searcher = Lucene.Net.Search.Searcher; +using Similarity = Lucene.Net.Search.Similarity; +using Weight = Lucene.Net.Search.Weight; + +namespace Lucene.Net.Search.Function +{ + + /// Query that sets document score as a programmatic function of several (sub) scores: + /// + /// the score of its subQuery (any query) + /// (optional) the score of its ValueSourceQuery (or queries). + /// For most simple/convenient use cases this query is likely to be a + /// FieldScoreQuery + /// + /// Subclasses can modify the computation by overriding . + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + ///

+ [Serializable] + public class CustomScoreQuery:Query, System.ICloneable + { + + private Query subQuery; + private ValueSourceQuery[] valSrcQueries; // never null (empty array if there are no valSrcQueries). + private bool strict = false; // if true, valueSource part of query does not take part in weights normalization. + + /// Create a CustomScoreQuery over input subQuery. + /// the sub query whose scored is being customed. Must not be null. + /// + public CustomScoreQuery(Query subQuery):this(subQuery, new ValueSourceQuery[0]) + { + } + + /// Create a CustomScoreQuery over input subQuery and a . + /// the sub query whose score is being customed. Must not be null. + /// + /// a value source query whose scores are used in the custom score + /// computation. For most simple/convineient use case this would be a + /// FieldScoreQuery. + /// This parameter is optional - it can be null or even an empty array. + /// + public CustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery):this(subQuery, valSrcQuery != null?new ValueSourceQuery[]{valSrcQuery}:new ValueSourceQuery[0]) + { + } + + /// Create a CustomScoreQuery over input subQuery and a . + /// the sub query whose score is being customized. Must not be null. + /// + /// value source queries whose scores are used in the custom score + /// computation. For most simple/convenient use case these would be + /// FieldScoreQueries. + /// This parameter is optional - it can be null or even an empty array. + /// + public CustomScoreQuery(Query subQuery, params ValueSourceQuery[] valSrcQueries) + { + this.subQuery = subQuery; + this.valSrcQueries = valSrcQueries != null?valSrcQueries:new ValueSourceQuery[0]; + if (subQuery == null) + throw new System.ArgumentException(" must not be null!"); + } + + /*(non-Javadoc) terms) + { + subQuery.ExtractTerms(terms); + for (int i = 0; i < valSrcQueries.Length; i++) + { + valSrcQueries[i].ExtractTerms(terms); + } + } + + /*(non-Javadoc) Returns true if o is equal to this. + public override bool Equals(System.Object o) + { + if (GetType() != o.GetType()) + { + return false; + } + CustomScoreQuery other = (CustomScoreQuery) o; + if (this.Boost != other.Boost || + !this.subQuery.Equals(other.subQuery) || + this.strict != other.strict || + this.valSrcQueries.Length != other.valSrcQueries.Length) + { + return false; + } + + // SequenceEqual should properly mimic java's Array.equals() + return valSrcQueries.SequenceEqual(other.valSrcQueries); + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + int valSrcHash = 0; + for (int i = 0; i < valSrcQueries.Length; i++) + { + // TODO: Simplify this hash code generation + valSrcHash += valSrcQueries[i].GetHashCode(); + } + return (GetType().GetHashCode() + subQuery.GetHashCode() + valSrcHash) ^ + BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ (strict ? 1234 : 4321); + + } + + /// + /// Returns a that calculates the custom scores + /// for the given . The default implementation returns a default + /// implementation as specified in the docs of . + /// + protected virtual CustomScoreProvider GetCustomScoreProvider(IndexReader reader) + { + // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider + return new AnonymousCustomScoreProvider(this, reader); + } + + class AnonymousCustomScoreProvider : CustomScoreProvider + { + CustomScoreQuery parent; + public AnonymousCustomScoreProvider(CustomScoreQuery parent, IndexReader reader) : base(reader) + { + this.parent = parent; + } + public override float CustomScore(int doc, float subQueryScore, float[] valSrcScores) + { + return parent.CustomScore(doc, subQueryScore, valSrcScores); + } + + public override float CustomScore(int doc, float subQueryScore, float valSrcScore) + { + return parent.CustomScore(doc, subQueryScore, valSrcScore); + } + + public override Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation[] valSrcExpls) + { + return parent.CustomExplain(doc, subQueryExpl, valSrcExpls); + } + + public override Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) + { + return parent.CustomExplain(doc, subQueryExpl, valSrcExpl); + } + } + + /// + /// Compute a custom score by the subQuery score and a number of + /// ValueSourceQuery scores. + /// + /// The doc is relative to the current reader, which is + /// unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + /// Please override and return a subclass + /// of for the given . + /// see CustomScoreProvider#customScore(int,float,float[]) + /// + [Obsolete("Will be removed in Lucene 3.1")] + public virtual float CustomScore(int doc, float subQueryScore, float[] valSrcScores) + { + if (valSrcScores.Length == 1) + { + return CustomScore(doc, subQueryScore, valSrcScores[0]); + } + if (valSrcScores.Length == 0) + { + return CustomScore(doc, subQueryScore, 1); + } + float score = subQueryScore; + for (int i = 0; i < valSrcScores.Length; i++) + { + score *= valSrcScores[i]; + } + return score; + } + + /// Compute a custom score by the subQuery score and the ValueSourceQuery score. + /// + /// The doc is relative to the current reader, which is + /// unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + /// Please override and return a subclass + /// of for the given . + /// + /// + [Obsolete("Will be removed in Lucene 3.1")] + public virtual float CustomScore(int doc, float subQueryScore, float valSrcScore) + { + return subQueryScore * valSrcScore; + } + + + + /// Explain the custom score. + /// + /// The doc is relative to the current reader, which is + /// unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + /// Please override and return a subclass + /// of for the given . + /// + [Obsolete("Will be removed in Lucene 3.1")] + public virtual Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation[] valSrcExpls) + { + if (valSrcExpls.Length == 1) + { + return CustomExplain(doc, subQueryExpl, valSrcExpls[0]); + } + if (valSrcExpls.Length == 0) + { + return subQueryExpl; + } + float valSrcScore = 1; + for (int i = 0; i < valSrcExpls.Length; i++) + { + valSrcScore *= valSrcExpls[i].Value; + } + Explanation exp = new Explanation(valSrcScore * subQueryExpl.Value, "custom score: product of:"); + exp.AddDetail(subQueryExpl); + for (int i = 0; i < valSrcExpls.Length; i++) + { + exp.AddDetail(valSrcExpls[i]); + } + return exp; + } + + /// Explain the custom score. + /// The doc is relative to the current reader, which is + /// unknown to CustomScoreQuery when using per-segment search (since Lucene 2.9). + /// Please override and return a subclass + /// of for the given . + /// + [Obsolete("Will be removed in Lucene 3.1")] + public virtual Explanation CustomExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) + { + float valSrcScore = 1; + if (valSrcExpl != null) + { + valSrcScore *= valSrcExpl.Value; + } + Explanation exp = new Explanation(valSrcScore * subQueryExpl.Value, "custom score: product of:"); + exp.AddDetail(subQueryExpl); + exp.AddDetail(valSrcExpl); + return exp; + } + + //=========================== W E I G H T ============================ + + [Serializable] + private class CustomWeight:Weight + { + private void InitBlock(CustomScoreQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private CustomScoreQuery enclosingInstance; + public CustomScoreQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal Similarity similarity; + internal Weight subQueryWeight; + internal Weight[] valSrcWeights; + internal bool qStrict; + + public CustomWeight(CustomScoreQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = Enclosing_Instance.GetSimilarity(searcher); + this.subQueryWeight = Enclosing_Instance.subQuery.Weight(searcher); + this.valSrcWeights = new Weight[Enclosing_Instance.valSrcQueries.Length]; + for (int i = 0; i < Enclosing_Instance.valSrcQueries.Length; i++) + { + this.valSrcWeights[i] = Enclosing_Instance.valSrcQueries[i].CreateWeight(searcher); + } + this.qStrict = Enclosing_Instance.strict; + } + + /*(non-Javadoc) A scorer that applies a (callback) function on scores of the subQuery. + private class CustomScorer:Scorer + { + private void InitBlock(CustomScoreQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private CustomScoreQuery enclosingInstance; + public CustomScoreQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private float qWeight; + private Scorer subQueryScorer; + private Scorer[] valSrcScorers; + private IndexReader reader; + private CustomScoreProvider provider; + private float[] vScores; // reused in score() to avoid allocating this array for each doc + + // constructor + internal CustomScorer(CustomScoreQuery enclosingInstance, Similarity similarity, IndexReader reader, CustomWeight w, Scorer subQueryScorer, Scorer[] valSrcScorers):base(similarity) + { + InitBlock(enclosingInstance); + this.qWeight = w.Value; + this.subQueryScorer = subQueryScorer; + this.valSrcScorers = valSrcScorers; + this.reader = reader; + this.vScores = new float[valSrcScorers.Length]; + this.provider = this.Enclosing_Instance.GetCustomScoreProvider(reader); + } + + public override int NextDoc() + { + int doc = subQueryScorer.NextDoc(); + if (doc != NO_MORE_DOCS) + { + for (int i = 0; i < valSrcScorers.Length; i++) + { + valSrcScorers[i].Advance(doc); + } + } + return doc; + } + + public override int DocID() + { + return subQueryScorer.DocID(); + } + + /*(non-Javadoc) The strict mode to set. + /// + /// + /// + public virtual void SetStrict(bool strict) + { + this.strict = strict; + } + + /// A short name of this query, used in . + public virtual System.String Name() + { + return "custom"; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Function/DocValues.cs b/external/Lucene.Net.Light/src/core/Search/Function/DocValues.cs new file mode 100644 index 0000000000..fcb5e7daa3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Function/DocValues.cs @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Explanation = Lucene.Net.Search.Explanation; + +namespace Lucene.Net.Search.Function +{ + + /// Expert: represents field values as different types. + /// Normally created via a + /// ValueSuorce + /// for a particular field and reader. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + /// + ///

+ public abstract class DocValues + { + /* + * DocValues is distinct from ValueSource because + * there needs to be an object created at query evaluation time that + * is not referenced by the query itself because: + * - Query objects should be MT safe + * - For caching, Query objects are often used as keys... you don't + * want the Query carrying around big objects + */ + + /// Return doc value as a float. + ///

Mandatory: every DocValues implementation must implement at least this method. + ///

+ /// document whose float value is requested. + /// + public abstract float FloatVal(int doc); + + /// Return doc value as an int. + ///

Optional: DocValues implementation can (but don't have to) override this method. + ///

+ /// document whose int value is requested. + /// + public virtual int IntVal(int doc) + { + return (int) FloatVal(doc); + } + + /// Return doc value as a long. + ///

Optional: DocValues implementation can (but don't have to) override this method. + ///

+ /// document whose long value is requested. + /// + public virtual long LongVal(int doc) + { + return (long) FloatVal(doc); + } + + /// Return doc value as a double. + ///

Optional: DocValues implementation can (but don't have to) override this method. + ///

+ /// document whose double value is requested. + /// + public virtual double DoubleVal(int doc) + { + return (double) FloatVal(doc); + } + + /// Return doc value as a string. + ///

Optional: DocValues implementation can (but don't have to) override this method. + ///

+ /// document whose string value is requested. + /// + public virtual System.String StrVal(int doc) + { + return FloatVal(doc).ToString(); + } + + /// Return a string representation of a doc value, as reuired for Explanations. + public abstract System.String ToString(int doc); + + /// Explain the scoring value for the input doc. + public virtual Explanation Explain(int doc) + { + return new Explanation(FloatVal(doc), ToString(doc)); + } + + /// Expert: for test purposes only, return the inner array of values, or null if not applicable. + ///

+ /// Allows tests to verify that loaded values are: + /// + /// indeed cached/reused. + /// stored in the expected size/type (byte/short/int/float). + /// + /// Note: implementations of DocValues must override this method for + /// these test elements to be tested, Otherwise the test would not fail, just + /// print a warning. + ///

+ protected internal virtual object InnerArray + { + get { throw new System.NotSupportedException("this optional method is for test purposes only"); } + } + + // --- some simple statistics on values + private float minVal = System.Single.NaN; + private float maxVal = System.Single.NaN; + private float avgVal = System.Single.NaN; + private bool computed = false; + // compute optional values + private void Compute() + { + if (computed) + { + return ; + } + float sum = 0; + int n = 0; + while (true) + { + float val; + try + { + val = FloatVal(n); + } + catch (System.IndexOutOfRangeException) + { + break; + } + sum += val; + minVal = System.Single.IsNaN(minVal)?val:System.Math.Min(minVal, val); + maxVal = System.Single.IsNaN(maxVal)?val:System.Math.Max(maxVal, val); + ++n; + } + + avgVal = n == 0?System.Single.NaN:sum / n; + computed = true; + } + + /// Returns the minimum of all values or Float.NaN if this + /// DocValues instance does not contain any value. + ///

+ /// This operation is optional + ///

+ /// + ///

+ /// the minimum of all values or Float.NaN if this + /// DocValues instance does not contain any value. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual float GetMinValue() + { + Compute(); + return minVal; + } + + /// Returns the maximum of all values or Float.NaN if this + /// DocValues instance does not contain any value. + ///

+ /// This operation is optional + ///

+ /// + ///

+ /// the maximum of all values or Float.NaN if this + /// DocValues instance does not contain any value. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual float GetMaxValue() + { + Compute(); + return maxVal; + } + + /// Returns the average of all values or Float.NaN if this + /// DocValues instance does not contain any value. * + ///

+ /// This operation is optional + ///

+ /// + ///

+ /// the average of all values or Float.NaN if this + /// DocValues instance does not contain any value + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual float GetAverageValue() + { + Compute(); + return avgVal; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Function/FieldCacheSource.cs b/external/Lucene.Net.Light/src/core/Search/Function/FieldCacheSource.cs new file mode 100644 index 0000000000..f5ccf1b0fe --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Function/FieldCacheSource.cs @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using FieldCache = Lucene.Net.Search.FieldCache; + +namespace Lucene.Net.Search.Function +{ + + /// Expert: A base class for ValueSource implementations that retrieve values for + /// a single field from the FieldCache. + ///

+ /// Fields used herein nust be indexed (doesn't matter if these fields are stored or not). + ///

+ /// It is assumed that each such indexed field is untokenized, or at least has a single token in a document. + /// For documents with multiple tokens of the same field, behavior is undefined (It is likely that current + /// code would use the value of one of these tokens, but this is not guaranteed). + ///

+ /// Document with no tokens in this field are assigned the Zero value. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + ///

NOTE: with the switch in 2.9 to segment-based + /// searching, if is invoked with a + /// composite (multi-segment) reader, this can easily cause + /// double RAM usage for the values in the FieldCache. It's + /// best to switch your application to pass only atomic + /// (single segment) readers to this API.

+ ///

+ [Serializable] + public abstract class FieldCacheSource:ValueSource + { + private System.String field; + + /// Create a cached field source for the input field. + protected FieldCacheSource(System.String field) + { + this.field = field; + } + + /* (non-Javadoc) Return cached DocValues for input field and reader. + /// FieldCache so that values of a field are loaded once per reader (RAM allowing) + /// + /// Field for which values are required. + /// + /// + /// + public abstract DocValues GetCachedFieldValues(FieldCache cache, System.String field, IndexReader reader); + + /*(non-Javadoc) Check if equals to another , already knowing that cache and field are equal. + /// + /// + public abstract bool CachedFieldSourceEquals(FieldCacheSource other); + + /// Return a hash code of a , without the hash-codes of the field + /// and the cache (those are taken care of elsewhere). + /// + /// + /// + public abstract int CachedFieldSourceHashCode(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Function/FieldScoreQuery.cs b/external/Lucene.Net.Light/src/core/Search/Function/FieldScoreQuery.cs new file mode 100644 index 0000000000..403fe5beca --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Function/FieldScoreQuery.cs @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search.Function +{ + + /// A query that scores each document as the value of the numeric input field. + ///

+ /// The query matches all documents, and scores each document according to the numeric + /// value of that field. + ///

+ /// It is assumed, and expected, that: + /// + /// The field used here is indexed, and has exactly + /// one token in every scored document. + /// Best if this field is un_tokenized. + /// That token is parsable to the selected type. + /// + ///

+ /// Combining this query in a FunctionQuery allows much freedom in affecting document scores. + /// Note, that with this freedom comes responsibility: it is more than likely that the + /// default Lucene scoring is superior in quality to scoring modified as explained here. + /// However, in some cases, and certainly for research experiments, this capability may turn useful. + ///

+ /// When contructing this query, select the appropriate type. That type should match the data stored in the + /// field. So in fact the "right" type should be selected before indexing. Type selection + /// has effect on the RAM usage: + /// + /// consumes 1 * maxDocs bytes. + /// consumes 2 * maxDocs bytes. + /// consumes 4 * maxDocs bytes. + /// consumes 8 * maxDocs bytes. + /// + ///

+ /// Caching: + /// Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader. + /// To take advantage of this, it is extremely important to reuse index-readers or index-searchers, + /// otherwise, for instance if for each query a new index reader is opened, large penalties would be + /// paid for loading the field values into memory over and over again! + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + ///

+ [Serializable] + public class FieldScoreQuery:ValueSourceQuery + { + + /// Type of score field, indicating how field values are interpreted/parsed. + ///

+ /// The type selected at search search time should match the data stored in the field. + /// Different types have different RAM requirements: + /// + /// consumes 1 * maxDocs bytes. + /// consumes 2 * maxDocs bytes. + /// consumes 4 * maxDocs bytes. + /// consumes 8 * maxDocs bytes. + /// + ///

+ public class Type + { + + /// field values are interpreted as numeric byte values. + public static readonly Type BYTE = new Type("byte"); + + /// field values are interpreted as numeric short values. + public static readonly Type SHORT = new Type("short"); + + /// field values are interpreted as numeric int values. + public static readonly Type INT = new Type("int"); + + /// field values are interpreted as numeric float values. + public static readonly Type FLOAT = new Type("float"); + + private System.String typeName; + internal Type(System.String name) + { + this.typeName = name; + } + /*(non-Javadoc) Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field. + ///

+ /// The type param tells how to parse the field string values into a numeric score value. + /// + /// the numeric field to be used. + /// + /// the type of the field: either + /// , , , or . + /// + public FieldScoreQuery(System.String field, Type type):base(GetValueSource(field, type)) + { + } + + // create the appropriate (cached) field value source. + private static ValueSource GetValueSource(System.String field, Type type) + { + if (type == Type.BYTE) + { + return new ByteFieldSource(field); + } + if (type == Type.SHORT) + { + return new ShortFieldSource(field); + } + if (type == Type.INT) + { + return new IntFieldSource(field); + } + if (type == Type.FLOAT) + { + return new FloatFieldSource(field); + } + throw new System.ArgumentException(type + " is not a known Field Score Query Type!"); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Function/FloatFieldSource.cs b/external/Lucene.Net.Light/src/core/Search/Function/FloatFieldSource.cs new file mode 100644 index 0000000000..45577acc40 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Function/FloatFieldSource.cs @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using FieldCache = Lucene.Net.Search.FieldCache; + +namespace Lucene.Net.Search.Function +{ + + ///

Expert: obtains float field values from the + /// FieldCache + /// using getFloats() and makes those values + /// available as other numeric types, casting as needed. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + ///

+ /// for requirements" + /// on the field. + /// + ///

NOTE: with the switch in 2.9 to segment-based + /// searching, if is invoked with a + /// composite (multi-segment) reader, this can easily cause + /// double RAM usage for the values in the FieldCache. It's + /// best to switch your application to pass only atomic + /// (single segment) readers to this API.

+ /// + [Serializable] + public class FloatFieldSource:FieldCacheSource + { + private class AnonymousClassDocValues:DocValues + { + public AnonymousClassDocValues(float[] arr, FloatFieldSource enclosingInstance) + { + InitBlock(arr, enclosingInstance); + } + private void InitBlock(float[] arr, FloatFieldSource enclosingInstance) + { + this.arr = arr; + this.enclosingInstance = enclosingInstance; + } + private float[] arr; + private FloatFieldSource enclosingInstance; + public FloatFieldSource Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + /*(non-Javadoc) Expert: obtains int field values from the + /// FieldCache + /// using getInts() and makes those values + /// available as other numeric types, casting as needed. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + /// + /// for requirements + /// on the field. + /// + ///

NOTE: with the switch in 2.9 to segment-based + /// searching, if is invoked with a + /// composite (multi-segment) reader, this can easily cause + /// double RAM usage for the values in the FieldCache. It's + /// best to switch your application to pass only atomic + /// (single segment) readers to this API.

+ /// + [Serializable] + public class IntFieldSource:FieldCacheSource + { + private class AnonymousClassDocValues:DocValues + { + public AnonymousClassDocValues(int[] arr, IntFieldSource enclosingInstance) + { + InitBlock(arr, enclosingInstance); + } + private void InitBlock(int[] arr, IntFieldSource enclosingInstance) + { + this.arr = arr; + this.enclosingInstance = enclosingInstance; + } + private int[] arr; + private IntFieldSource enclosingInstance; + public IntFieldSource Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + /*(non-Javadoc) Create a cached int field source with default string-to-int parser. + public IntFieldSource(System.String field):this(field, null) + { + } + + ///

Create a cached int field source with a specific string-to-int parser. + public IntFieldSource(System.String field, Lucene.Net.Search.IntParser parser):base(field) + { + this.parser = parser; + } + + /*(non-Javadoc) Expert: obtains the ordinal of the field value from the default Lucene + /// Fieldcache using getStringIndex(). + ///

+ /// The native lucene index order is used to assign an ordinal value for each field value. + ///

+ /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. + ///

+ /// Example: + ///
If there were only three field values: "apple","banana","pear" + ///
then ord("apple")=1, ord("banana")=2, ord("pear")=3 + ///

+ /// WARNING: + /// ord() depends on the position in an index and can thus change + /// when other documents are inserted or deleted, + /// or if a MultiSearcher is used. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + ///

NOTE: with the switch in 2.9 to segment-based + /// searching, if is invoked with a + /// composite (multi-segment) reader, this can easily cause + /// double RAM usage for the values in the FieldCache. It's + /// best to switch your application to pass only atomic + /// (single segment) readers to this API.

+ /// + + [Serializable] + public class OrdFieldSource:ValueSource + { + private class AnonymousClassDocValues:DocValues + { + public AnonymousClassDocValues(int[] arr, OrdFieldSource enclosingInstance) + { + InitBlock(arr, enclosingInstance); + } + private void InitBlock(int[] arr, OrdFieldSource enclosingInstance) + { + this.arr = arr; + this.enclosingInstance = enclosingInstance; + } + private int[] arr; + private OrdFieldSource enclosingInstance; + public OrdFieldSource Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + /*(non-Javadoc) Constructor for a certain field. + /// field whose values order is used. + /// + public OrdFieldSource(System.String field) + { + this.field = field; + } + + /*(non-Javadoc) Expert: obtains the ordinal of the field value from the default Lucene + /// FieldCache using getStringIndex() + /// and reverses the order. + ///

+ /// The native lucene index order is used to assign an ordinal value for each field value. + ///

+ /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. + ///
+ /// Example of reverse ordinal (rord): + ///
If there were only three field values: "apple","banana","pear" + ///
then rord("apple")=3, rord("banana")=2, ord("pear")=1 + ///

+ /// WARNING: + /// rord() depends on the position in an index and can thus change + /// when other documents are inserted or deleted, + /// or if a MultiSearcher is used. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + ///

NOTE: with the switch in 2.9 to segment-based + /// searching, if is invoked with a + /// composite (multi-segment) reader, this can easily cause + /// double RAM usage for the values in the FieldCache. It's + /// best to switch your application to pass only atomic + /// (single segment) readers to this API.

+ /// + + [Serializable] + public class ReverseOrdFieldSource:ValueSource + { + private class AnonymousClassDocValues:DocValues + { + public AnonymousClassDocValues(int end, int[] arr, ReverseOrdFieldSource enclosingInstance) + { + InitBlock(end, arr, enclosingInstance); + } + private void InitBlock(int end, int[] arr, ReverseOrdFieldSource enclosingInstance) + { + this.end = end; + this.arr = arr; + this.enclosingInstance = enclosingInstance; + } + private int end; + private int[] arr; + private ReverseOrdFieldSource enclosingInstance; + public ReverseOrdFieldSource Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + /*(non-Javadoc) Expert: obtains short field values from the + /// FieldCache + /// using getShorts() and makes those values + /// available as other numeric types, casting as needed. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + /// + /// for requirements + /// on the field. + /// + ///

NOTE: with the switch in 2.9 to segment-based + /// searching, if is invoked with a + /// composite (multi-segment) reader, this can easily cause + /// double RAM usage for the values in the FieldCache. It's + /// best to switch your application to pass only atomic + /// (single segment) readers to this API.

+ /// + [Serializable] + public class ShortFieldSource:FieldCacheSource + { + private class AnonymousClassDocValues:DocValues + { + public AnonymousClassDocValues(short[] arr, ShortFieldSource enclosingInstance) + { + InitBlock(arr, enclosingInstance); + } + private void InitBlock(short[] arr, ShortFieldSource enclosingInstance) + { + this.arr = arr; + this.enclosingInstance = enclosingInstance; + } + private short[] arr; + private ShortFieldSource enclosingInstance; + public ShortFieldSource Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + /*(non-Javadoc) Create a cached short field source with default string-to-short parser. + public ShortFieldSource(System.String field):this(field, null) + { + } + + ///

Create a cached short field source with a specific string-to-short parser. + public ShortFieldSource(System.String field, Lucene.Net.Search.ShortParser parser):base(field) + { + this.parser = parser; + } + + /*(non-Javadoc) Expert: source of values for basic function queries. + ///

At its default/simplest form, values - one per doc - are used as the score of that doc. + ///

Values are instantiated as + /// DocValues for a particular reader. + ///

ValueSource implementations differ in RAM requirements: it would always be a factor + /// of the number of documents, but for each document the number of bytes can be 1, 2, 4, or 8. + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + /// + /// + [Serializable] + public abstract class ValueSource + { + + ///

Return the DocValues used by the function query. + /// the IndexReader used to read these values. + /// If any caching is involved, that caching would also be IndexReader based. + /// + /// IOException for any error. + public abstract DocValues GetValues(IndexReader reader); + + /// description of field, used in explain() + public abstract System.String Description(); + + /* (non-Javadoc) . + /// + /// + abstract public override bool Equals(System.Object o); + + /// Needed for possible caching of query results - used by . + /// + /// + abstract public override int GetHashCode(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Function/ValueSourceQuery.cs b/external/Lucene.Net.Light/src/core/Search/Function/ValueSourceQuery.cs new file mode 100644 index 0000000000..66593a454f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Function/ValueSourceQuery.cs @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; +using IndexReader = Lucene.Net.Index.IndexReader; +using TermDocs = Lucene.Net.Index.TermDocs; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using Lucene.Net.Search; + +namespace Lucene.Net.Search.Function +{ + + /// Expert: A Query that sets the scores of document to the + /// values obtained from a ValueSource. + ///

+ /// This query provides a score for each and every undeleted document in the index. + ///

+ /// The value source can be based on a (cached) value of an indexed field, but it + /// can also be based on an external source, e.g. values read from an external database. + ///

+ /// Score is set as: Score(doc,query) = query.getBoost()2 * valueSource(doc). + /// + ///

+ /// WARNING: The status of the Search.Function package is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + ///

+ [Serializable] + public class ValueSourceQuery:Query + { + internal ValueSource valSrc; + + /// Create a value source query + /// provides the values defines the function to be used for scoring + /// + public ValueSourceQuery(ValueSource valSrc) + { + this.valSrc = valSrc; + } + + /*(non-Javadoc) terms) + { + // no terms involved here + } + + [Serializable] + internal class ValueSourceWeight:Weight + { + private void InitBlock(ValueSourceQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ValueSourceQuery enclosingInstance; + public ValueSourceQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal Similarity similarity; + internal float queryNorm; + internal float queryWeight; + + public ValueSourceWeight(ValueSourceQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = Enclosing_Instance.GetSimilarity(searcher); + } + + /*(non-Javadoc) A scorer that (simply) matches all documents, and scores each document with + /// the value of the value soure in effect. As an example, if the value source + /// is a (cached) field source, then value of that field in that document will + /// be used. (assuming field is indexed for this doc, with a single token.) + /// + private class ValueSourceScorer : Scorer + { + private void InitBlock(ValueSourceQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ValueSourceQuery enclosingInstance; + public ValueSourceQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private ValueSourceWeight weight; + private float qWeight; + private DocValues vals; + private TermDocs termDocs; + private int doc = -1; + + // constructor + internal ValueSourceScorer(ValueSourceQuery enclosingInstance, Similarity similarity, IndexReader reader, ValueSourceWeight w) + : base(similarity) + { + InitBlock(enclosingInstance); + this.weight = w; + this.qWeight = w.Value; + // this is when/where the values are first created. + vals = Enclosing_Instance.valSrc.GetValues(reader); + termDocs = reader.TermDocs(null); + } + + public override int NextDoc() + { + return doc = termDocs.Next() ? termDocs.Doc : NO_MORE_DOCS; + } + + public override int DocID() + { + return doc; + } + + public override int Advance(int target) + { + return doc = termDocs.SkipTo(target) ? termDocs.Doc : NO_MORE_DOCS; + } + + /*(non-Javadoc) Implements the fuzzy search query. The similarity measurement + /// is based on the Levenshtein (edit distance) algorithm. + /// + /// Warning: this query is not very scalable with its default prefix + /// length of 0 - in this case, *every* term will be enumerated and + /// cause an edit score calculation. + /// + /// + [Serializable] + public class FuzzyQuery : MultiTermQuery + { + + public const float defaultMinSimilarity = 0.5f; + public const int defaultPrefixLength = 0; + + private float minimumSimilarity; + private int prefixLength; + private bool termLongEnough = false; + + /// Returns the pattern term. + public Term Term { get; protected internal set; } + + /// Create a new FuzzyQuery that will match terms with a similarity + /// of at least minimumSimilarity to term. + /// If a prefixLength > 0 is specified, a common prefix + /// of that length is also required. + /// + /// + /// the term to search for + /// + /// a value between 0 and 1 to set the required similarity + /// between the query term and the matching terms. For example, for a + /// minimumSimilarity of 0.5 a term of the same length + /// as the query term is considered similar to the query term if the edit distance + /// between both terms is less than length(term)*0.5 + /// + /// length of common (non-fuzzy) prefix + /// + /// IllegalArgumentException if minimumSimilarity is >= 1 or < 0 + /// or if prefixLength < 0 + /// + public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) + { + this.Term = term; + + if (minimumSimilarity >= 1.0f) + throw new System.ArgumentException("minimumSimilarity >= 1"); + else if (minimumSimilarity < 0.0f) + throw new System.ArgumentException("minimumSimilarity < 0"); + if (prefixLength < 0) + throw new System.ArgumentException("prefixLength < 0"); + + if (term.Text.Length > 1.0f / (1.0f - minimumSimilarity)) + { + this.termLongEnough = true; + } + + this.minimumSimilarity = minimumSimilarity; + this.prefixLength = prefixLength; + internalRewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE; + } + + /// Calls FuzzyQuery(term, minimumSimilarity, 0). + public FuzzyQuery(Term term, float minimumSimilarity):this(term, minimumSimilarity, defaultPrefixLength) + { + } + + /// Calls FuzzyQuery(term, 0.5f, 0). + public FuzzyQuery(Term term):this(term, defaultMinSimilarity, defaultPrefixLength) + { + } + + /// Returns the minimum similarity that is required for this query to match. + /// float value between 0.0 and 1.0 + public virtual float MinSimilarity + { + get { return minimumSimilarity; } + } + + /// Returns the non-fuzzy prefix length. This is the number of characters at the start + /// of a term that must be identical (not fuzzy) to the query term if the query + /// is to match that term. + /// + public virtual int PrefixLength + { + get { return prefixLength; } + } + + protected internal override FilteredTermEnum GetEnum(IndexReader reader) + { + return new FuzzyTermEnum(reader, Term, minimumSimilarity, prefixLength); + } + + public override RewriteMethod RewriteMethod + { + set { throw new System.NotSupportedException("FuzzyQuery cannot change rewrite method"); } + } + + public override Query Rewrite(IndexReader reader) + { + if (!termLongEnough) + { + // can only match if it's exact + return new TermQuery(Term); + } + + int maxSize = BooleanQuery.MaxClauseCount; + + // TODO: Java uses a PriorityQueue. Using Linq, we can emulate it, + // however it's considerable slower than the java counterpart. + // this should be a temporary thing, fixed before release + SortedList stQueue = new SortedList(); + FilteredTermEnum enumerator = GetEnum(reader); + + try + { + ScoreTerm st = new ScoreTerm(); + do + { + Term t = enumerator.Term; + if (t == null) break; + float score = enumerator.Difference(); + //ignore uncompetetive hits + if (stQueue.Count >= maxSize && score <= stQueue.Keys.First().score) + continue; + // add new entry in PQ + st.term = t; + st.score = score; + stQueue.Add(st, st); + // possibly drop entries from queue + if (stQueue.Count > maxSize) + { + st = stQueue.Keys.First(); + stQueue.Remove(st); + } + else + { + st = new ScoreTerm(); + } + } + while (enumerator.Next()); + } + finally + { + enumerator.Close(); + } + + BooleanQuery query = new BooleanQuery(true); + foreach(ScoreTerm st in stQueue.Keys) + { + TermQuery tq = new TermQuery(st.term); // found a match + tq.Boost = Boost * st.score; // set the boost + query.Add(tq, Occur.SHOULD); // add to query + } + + return query; + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + if (!Term.Field.Equals(field)) + { + buffer.Append(Term.Field); + buffer.Append(":"); + } + buffer.Append(Term.Text); + buffer.Append('~'); + buffer.Append(Single.ToString(minimumSimilarity)); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + protected internal class ScoreTerm : IComparable + { + public Term term; + public float score; + + public int CompareTo(ScoreTerm other) + { + if (Comparer.Default.Compare(this.score, other.score) == 0) + { + return other.term.CompareTo(this.term); + } + else + { + return Comparer.Default.Compare(this.score, other.score); + } + } + } + + public override int GetHashCode() + { + int prime = 31; + int result = base.GetHashCode(); + result = prime * result + BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0); + result = prime * result + prefixLength; + result = prime * result + ((Term == null)?0:Term.GetHashCode()); + return result; + } + + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (!base.Equals(obj)) + return false; + if (GetType() != obj.GetType()) + return false; + FuzzyQuery other = (FuzzyQuery) obj; + if (BitConverter.ToInt32(BitConverter.GetBytes(minimumSimilarity), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.minimumSimilarity), 0)) + return false; + if (prefixLength != other.prefixLength) + return false; + if (Term == null) + { + if (other.Term != null) + return false; + } + else if (!Term.Equals(other.Term)) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/FuzzyTermEnum.cs b/external/Lucene.Net.Light/src/core/Search/FuzzyTermEnum.cs new file mode 100644 index 0000000000..6e4fc7bfe5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/FuzzyTermEnum.cs @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; + +namespace Lucene.Net.Search +{ + + /// Subclass of FilteredTermEnum for enumerating all terms that are similiar + /// to the specified filter term. + /// + ///

Term enumerations are always ordered by Term.compareTo(). Each term in + /// the enumeration is greater than all that precede it. + ///

+ public sealed class FuzzyTermEnum:FilteredTermEnum + { + /* Allows us save time required to create a new array + * everytime similarity is called. + */ + private int[] p; + private int[] d; + + private float similarity; + private bool endEnum = false; + + private bool isDisposed; + + private Term searchTerm = null; + private System.String field; + private System.String text; + private System.String prefix; + + private float minimumSimilarity; + private float scale_factor; + + /// Creates a FuzzyTermEnum with an empty prefix and a minSimilarity of 0.5f. + ///

+ /// After calling the constructor the enumeration is already pointing to the first + /// valid term if such a term exists. + /// + ///

+ /// + /// + /// + /// + /// IOException + /// + /// + public FuzzyTermEnum(IndexReader reader, Term term):this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength) + { + } + + /// Creates a FuzzyTermEnum with an empty prefix. + ///

+ /// After calling the constructor the enumeration is already pointing to the first + /// valid term if such a term exists. + /// + ///

+ /// + /// + /// + /// + /// + /// + /// IOException + /// + /// + public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity):this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength) + { + } + + /// Constructor for enumeration of all terms from specified reader which share a prefix of + /// length prefixLength with term and which have a fuzzy similarity > + /// minSimilarity. + ///

+ /// After calling the constructor the enumeration is already pointing to the first + /// valid term if such a term exists. + /// + ///

+ /// Delivers terms. + /// + /// Pattern term. + /// + /// Minimum required similarity for terms from the reader. Default value is 0.5f. + /// + /// Length of required common prefix. Default value is 0. + /// + /// IOException + public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength):base() + { + + if (minSimilarity >= 1.0f) + throw new System.ArgumentException("minimumSimilarity cannot be greater than or equal to 1"); + else if (minSimilarity < 0.0f) + throw new System.ArgumentException("minimumSimilarity cannot be less than 0"); + if (prefixLength < 0) + throw new System.ArgumentException("prefixLength cannot be less than 0"); + + this.minimumSimilarity = minSimilarity; + this.scale_factor = 1.0f / (1.0f - minimumSimilarity); + this.searchTerm = term; + this.field = searchTerm.Field; + + //The prefix could be longer than the word. + //It's kind of silly though. It means we must match the entire word. + int fullSearchTermLength = searchTerm.Text.Length; + int realPrefixLength = prefixLength > fullSearchTermLength?fullSearchTermLength:prefixLength; + + this.text = searchTerm.Text.Substring(realPrefixLength); + this.prefix = searchTerm.Text.Substring(0, (realPrefixLength) - (0)); + + this.p = new int[this.text.Length + 1]; + this.d = new int[this.text.Length + 1]; + + SetEnum(reader.Terms(new Term(searchTerm.Field, prefix))); + } + + /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to + /// calculate the distance between the given term and the comparing term. + /// + protected internal override bool TermCompare(Term term) + { + if ((System.Object) field == (System.Object) term.Field && term.Text.StartsWith(prefix)) + { + System.String target = term.Text.Substring(prefix.Length); + this.similarity = Similarity(target); + return (similarity > minimumSimilarity); + } + endEnum = true; + return false; + } + + public override float Difference() + { + return ((similarity - minimumSimilarity) * scale_factor); + } + + public override bool EndEnum() + { + return endEnum; + } + + // + // *************************** + // Compute Levenshtein distance + // **************************** + // + + ///

Similarity returns a number that is 1.0f or less (including negative numbers) + /// based on how similar the Term is compared to a target term. It returns + /// exactly 0.0f when + /// + /// editDistance > maximumEditDistance + /// Otherwise it returns: + /// + /// 1 - (editDistance / length) + /// where length is the length of the shortest term (text or target) including a + /// prefix that are identical and editDistance is the Levenshtein distance for + /// the two words.

+ /// + ///

Embedded within this algorithm is a fail-fast Levenshtein distance + /// algorithm. The fail-fast algorithm differs from the standard Levenshtein + /// distance algorithm in that it is aborted if it is discovered that the + /// mimimum distance between the words is greater than some threshold. + /// + ///

To calculate the maximum distance threshold we use the following formula: + /// + /// (1 - minimumSimilarity) * length + /// where length is the shortest term including any prefix that is not part of the + /// similarity comparision. This formula was derived by solving for what maximum value + /// of distance returns false for the following statements: + /// + /// similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen))); + /// return (similarity > minimumSimilarity); + /// where distance is the Levenshtein distance for the two words. + ///

+ ///

Levenshtein distance (also known as edit distance) is a measure of similiarity + /// between two strings where the distance is measured as the number of character + /// deletions, insertions or substitutions required to transform one string to + /// the other string. + ///

+ /// the target word or phrase + /// + /// the similarity, 0.0 or less indicates that it matches less than the required + /// threshold and 1.0 indicates that the text and target are identical + /// + private float Similarity(System.String target) + { + + int m = target.Length; + int n = text.Length; + if (n == 0) + { + //we don't have anything to compare. That means if we just add + //the letters for m we get the new word + return prefix.Length == 0 ? 0.0f : 1.0f - ((float)m / prefix.Length); + } + if (m == 0) + { + return prefix.Length == 0 ? 0.0f : 1.0f - ((float)n / prefix.Length); + } + + int maxDistance = CalculateMaxDistance(m); + + if (maxDistance < System.Math.Abs(m - n)) + { + //just adding the characters of m to n or vice-versa results in + //too many edits + //for example "pre" length is 3 and "prefixes" length is 8. We can see that + //given this optimal circumstance, the edit distance cannot be less than 5. + //which is 8-3 or more precisesly Math.abs(3-8). + //if our maximum edit distance is 4, then we can discard this word + //without looking at it. + return 0.0f; + } + + // init matrix d + for (int i = 0; i < n; ++i) + { + p[i] = i; + } + + // start computing edit distance + for (int j = 1; j <= m; ++j) + { + int bestPossibleEditDistance = m; + char t_j = target[j - 1]; + d[0] = j; + for (int i = 1; i <= n; ++i) + { + // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1) + if (t_j != text[i - 1]) + { + d[i] = Math.Min(Math.Min(d[i - 1], p[i]), p[i - 1]) + 1; + } + else + { + d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1]); + } + bestPossibleEditDistance = System.Math.Min(bestPossibleEditDistance, d[i]); + } + + //After calculating row i, the best possible edit distance + //can be found by found by finding the smallest value in a given column. + //If the bestPossibleEditDistance is greater than the max distance, abort. + + if (j > maxDistance && bestPossibleEditDistance > maxDistance) + { + //equal is okay, but not greater + //the closest the target can be to the text is just too far away. + //this target is leaving the party early. + return 0.0f; + } + + // copy current distance counts to 'previous row' distance counts: swap p and d + int[] _d = p; + p = d; + d = _d; + } + + // our last action in the above loop was to switch d and p, so p now + // actually has the most recent cost counts + + // this will return less than 0.0 when the edit distance is + // greater than the number of characters in the shorter word. + // but this was the formula that was previously used in FuzzyTermEnum, + // so it has not been changed (even though minimumSimilarity must be + // greater than 0.0) + return 1.0f - (p[n] / (float)(prefix.Length + System.Math.Min(n, m))); + + } + + /// The max Distance is the maximum Levenshtein distance for the text + /// compared to some other value that results in score that is + /// better than the minimum similarity. + /// + /// the length of the "other value" + /// + /// the maximum levenshtein distance that we care about + /// + private int CalculateMaxDistance(int m) + { + return (int) ((1 - minimumSimilarity) * (System.Math.Min(text.Length, m) + prefix.Length)); + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + p = null; + d = null; + searchTerm = null; + } + + isDisposed = true; + base.Dispose(disposing); //call super.close() and let the garbage collector do its work. + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/HitQueue.cs b/external/Lucene.Net.Light/src/core/Search/HitQueue.cs new file mode 100644 index 0000000000..925d3a614c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/HitQueue.cs @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Search +{ + + public sealed class HitQueue : PriorityQueue + { + + private bool prePopulate; + + /// Creates a new instance with size elements. If + /// prePopulate is set to true, the queue will pre-populate itself + /// with sentinel objects and set its to size. In + /// that case, you should not rely on to get the number of + /// actual elements that were added to the queue, but keep track yourself.
+ /// NOTE: in case prePopulate is true, you should pop + /// elements from the queue using the following code example: + /// + /// + /// PriorityQueue pq = new HitQueue(10, true); // pre-populate. + /// ScoreDoc top = pq.top(); + /// + /// // Add/Update one element. + /// top.score = 1.0f; + /// top.doc = 0; + /// top = (ScoreDoc) pq.updateTop(); + /// int totalHits = 1; + /// + /// // Now pop only the elements that were *truly* inserted. + /// // First, pop all the sentinel elements (there are pq.size() - totalHits). + /// for (int i = pq.size() - totalHits; i > 0; i--) pq.pop(); + /// + /// // Now pop the truly added elements. + /// ScoreDoc[] results = new ScoreDoc[totalHits]; + /// for (int i = totalHits - 1; i >= 0; i--) { + /// results[i] = (ScoreDoc) pq.pop(); + /// } + /// + /// + ///

NOTE: This class pre-allocate a full array of + /// length size. + /// + ///

+ /// the requested size of this queue. + /// + /// specifies whether to pre-populate the queue with sentinel values. + /// + /// + /// + public /*internal*/ HitQueue(int size, bool prePopulate) + { + this.prePopulate = prePopulate; + Initialize(size); + } + + // Returns null if prePopulate is false. + + protected internal override ScoreDoc SentinelObject + { + get + { + // Always set the doc Id to MAX_VALUE so that it won't be favored by + // lessThan. This generally should not happen since if score is not NEG_INF, + // TopScoreDocCollector will always add the object to the queue. + return !prePopulate ? null : new ScoreDoc(System.Int32.MaxValue, System.Single.NegativeInfinity); + } + } + + public override bool LessThan(ScoreDoc hitA, ScoreDoc hitB) + { + if (hitA.Score == hitB.Score) + return hitA.Doc > hitB.Doc; + else + return hitA.Score < hitB.Score; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/IndexSearcher.cs b/external/Lucene.Net.Light/src/core/Search/IndexSearcher.cs new file mode 100644 index 0000000000..f77ff20612 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/IndexSearcher.cs @@ -0,0 +1,343 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using Lucene.Net.Index; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using Directory = Lucene.Net.Store.Directory; +using ReaderUtil = Lucene.Net.Util.ReaderUtil; + +namespace Lucene.Net.Search +{ + + /// Implements search over a single IndexReader. + /// + ///

Applications usually need only call the inherited + /// or methods. For performance reasons it is + /// recommended to open only one IndexSearcher and use it for all of your searches. + /// + ///

NOTE: + /// instances are completely + /// thread safe, meaning multiple threads can call any of its + /// methods, concurrently. If your application requires + /// external synchronization, you should not + /// synchronize on the IndexSearcher instance; + /// use your own (non-Lucene) objects instead.

+ ///

+ [Serializable] + public class IndexSearcher : Searcher + { + internal IndexReader reader; + private bool closeReader; + private bool isDisposed; + + // NOTE: these members might change in incompatible ways + // in the next release + private IndexReader[] subReaders; + private int[] docStarts; + + /// Creates a searcher searching the index in the named + /// directory, with readOnly=true + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + public IndexSearcher(Directory path) + : this(IndexReader.Open(path, true), true) + { + } + + /// Creates a searcher searching the index in the named + /// directory. You should pass readOnly=true, since it + /// gives much better concurrent performance, unless you + /// intend to do write operations (delete documents or + /// change norms) with the underlying IndexReader. + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + /// directory where IndexReader will be opened + /// + /// if true, the underlying IndexReader + /// will be opened readOnly + /// + public IndexSearcher(Directory path, bool readOnly):this(IndexReader.Open(path, readOnly), true) + { + } + + /// Creates a searcher searching the provided index + /// + /// Note that the underlying IndexReader is not closed, if + /// IndexSearcher was constructed with IndexSearcher(IndexReader r). + /// If the IndexReader was supplied implicitly by specifying a directory, then + /// the IndexReader gets closed. + /// + /// + public IndexSearcher(IndexReader r):this(r, false) + { + } + + /// + /// Expert: directly specify the reader, subReaders and their + /// DocID starts + ///

+ /// NOTE: This API is experimental and + /// might change in incompatible ways in the next + /// release

+ ///

+ public IndexSearcher(IndexReader reader, IndexReader[] subReaders, int[] docStarts) + { + this.reader = reader; + this.subReaders = subReaders; + this.docStarts = docStarts; + this.closeReader = false; + } + + private IndexSearcher(IndexReader r, bool closeReader) + { + reader = r; + this.closeReader = closeReader; + + System.Collections.Generic.IList subReadersList = new System.Collections.Generic.List(); + GatherSubReaders(subReadersList, reader); + subReaders = subReadersList.ToArray(); + docStarts = new int[subReaders.Length]; + int maxDoc = 0; + for (int i = 0; i < subReaders.Length; i++) + { + docStarts[i] = maxDoc; + maxDoc += subReaders[i].MaxDoc; + } + } + + protected internal virtual void GatherSubReaders(System.Collections.Generic.IList allSubReaders, IndexReader r) + { + ReaderUtil.GatherSubReaders(allSubReaders, r); + } + + /// Return the this searches. + public virtual IndexReader IndexReader + { + get { return reader; } + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (closeReader) + reader.Close(); + } + + isDisposed = true; + } + + // inherit javadoc + public override int DocFreq(Term term) + { + return reader.DocFreq(term); + } + + // inherit javadoc + public override Document Doc(int i) + { + return reader.Document(i); + } + + // inherit javadoc + public override Document Doc(int i, FieldSelector fieldSelector) + { + return reader.Document(i, fieldSelector); + } + + // inherit javadoc + public override int MaxDoc + { + get { return reader.MaxDoc; } + } + + // inherit javadoc + public override TopDocs Search(Weight weight, Filter filter, int nDocs) + { + + if (nDocs <= 0) + { + throw new System.ArgumentException("nDocs must be > 0"); + } + nDocs = Math.Min(nDocs, reader.MaxDoc); + + TopScoreDocCollector collector = TopScoreDocCollector.Create(nDocs, !weight.GetScoresDocsOutOfOrder()); + Search(weight, filter, collector); + return collector.TopDocs(); + } + + public override TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort) + { + return Search(weight, filter, nDocs, sort, true); + } + + /// Just like , but you choose + /// whether or not the fields in the returned instances + /// should be set by specifying fillFields. + ///

+ /// NOTE: this does not compute scores by default. If you need scores, create + /// a instance by calling + /// and then pass that to + /// . + ///

+ ///

+ public virtual TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort, bool fillFields) + { + nDocs = Math.Min(nDocs, reader.MaxDoc); + + TopFieldCollector collector2 = TopFieldCollector.Create(sort, nDocs, fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.GetScoresDocsOutOfOrder()); + Search(weight, filter, collector2); + return (TopFieldDocs) collector2.TopDocs(); + } + + public override void Search(Weight weight, Filter filter, Collector collector) + { + + if (filter == null) + { + for (int i = 0; i < subReaders.Length; i++) + { + // search each subreader + collector.SetNextReader(subReaders[i], docStarts[i]); + Scorer scorer = weight.Scorer(subReaders[i], !collector.AcceptsDocsOutOfOrder, true); + if (scorer != null) + { + scorer.Score(collector); + } + } + } + else + { + for (int i = 0; i < subReaders.Length; i++) + { + // search each subreader + collector.SetNextReader(subReaders[i], docStarts[i]); + SearchWithFilter(subReaders[i], weight, filter, collector); + } + } + } + + private void SearchWithFilter(IndexReader reader, Weight weight, Filter filter, Collector collector) + { + + System.Diagnostics.Debug.Assert(filter != null); + + Scorer scorer = weight.Scorer(reader, true, false); + if (scorer == null) + { + return ; + } + + int docID = scorer.DocID(); + System.Diagnostics.Debug.Assert(docID == - 1 || docID == DocIdSetIterator.NO_MORE_DOCS); + + // CHECKME: use ConjunctionScorer here? + DocIdSet filterDocIdSet = filter.GetDocIdSet(reader); + if (filterDocIdSet == null) + { + // this means the filter does not accept any documents. + return ; + } + + DocIdSetIterator filterIter = filterDocIdSet.Iterator(); + if (filterIter == null) + { + // this means the filter does not accept any documents. + return ; + } + int filterDoc = filterIter.NextDoc(); + int scorerDoc = scorer.Advance(filterDoc); + + collector.SetScorer(scorer); + while (true) + { + if (scorerDoc == filterDoc) + { + // Check if scorer has exhausted, only before collecting. + if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) + { + break; + } + collector.Collect(scorerDoc); + filterDoc = filterIter.NextDoc(); + scorerDoc = scorer.Advance(filterDoc); + } + else if (scorerDoc > filterDoc) + { + filterDoc = filterIter.Advance(scorerDoc); + } + else + { + scorerDoc = scorer.Advance(filterDoc); + } + } + } + + public override Query Rewrite(Query original) + { + Query query = original; + for (Query rewrittenQuery = query.Rewrite(reader); rewrittenQuery != query; rewrittenQuery = query.Rewrite(reader)) + { + query = rewrittenQuery; + } + return query; + } + + public override Explanation Explain(Weight weight, int doc) + { + int n = ReaderUtil.SubIndex(doc, docStarts); + int deBasedDoc = doc - docStarts[n]; + + return weight.Explain(subReaders[n], deBasedDoc); + } + + private bool fieldSortDoTrackScores; + private bool fieldSortDoMaxScore; + + /// By default, no scores are computed when sorting by field (using + /// ). You can change that, per + /// IndexSearcher instance, by calling this method. Note that this will incur + /// a CPU cost. + /// + /// + /// If true, then scores are returned for every matching document + /// in . + /// + /// + /// If true, then the max score for all matching docs is computed. + /// + public virtual void SetDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore) + { + fieldSortDoTrackScores = doTrackScores; + fieldSortDoMaxScore = doMaxScore; + } + + public IndexReader reader_ForNUnit + { + get { return reader; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/MatchAllDocsQuery.cs b/external/Lucene.Net.Light/src/core/Search/MatchAllDocsQuery.cs new file mode 100644 index 0000000000..a380b3508a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/MatchAllDocsQuery.cs @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; +using IndexReader = Lucene.Net.Index.IndexReader; +using TermDocs = Lucene.Net.Index.TermDocs; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search +{ + + /// A query that matches all documents. + /// + /// + [Serializable] + public class MatchAllDocsQuery:Query + { + + public MatchAllDocsQuery():this(null) + { + } + + private System.String normsField; + + /// Field used for normalization factor (document boost). Null if nothing. + /// + public MatchAllDocsQuery(System.String normsField) + { + this.normsField = normsField; + } + + private class MatchAllScorer:Scorer + { + private void InitBlock(MatchAllDocsQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private MatchAllDocsQuery enclosingInstance; + public MatchAllDocsQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal TermDocs termDocs; + internal float score; + internal byte[] norms; + private int doc = - 1; + + internal MatchAllScorer(MatchAllDocsQuery enclosingInstance, IndexReader reader, Similarity similarity, Weight w, byte[] norms):base(similarity) + { + InitBlock(enclosingInstance); + this.termDocs = reader.TermDocs(null); + score = w.Value; + this.norms = norms; + } + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + return doc = termDocs.Next()?termDocs.Doc:NO_MORE_DOCS; + } + + public override float Score() + { + return norms == null?score:score * Similarity.DecodeNorm(norms[DocID()]); + } + + public override int Advance(int target) + { + return doc = termDocs.SkipTo(target)?termDocs.Doc:NO_MORE_DOCS; + } + } + + [Serializable] + private class MatchAllDocsWeight:Weight + { + private void InitBlock(MatchAllDocsQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private MatchAllDocsQuery enclosingInstance; + public MatchAllDocsQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private Similarity similarity; + private float queryWeight; + private float queryNorm; + + public MatchAllDocsWeight(MatchAllDocsQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = searcher.Similarity; + } + + public override System.String ToString() + { + return "weight(" + Enclosing_Instance + ")"; + } + + public override Query Query + { + get { return Enclosing_Instance; } + } + + public override float Value + { + get { return queryWeight; } + } + + public override float GetSumOfSquaredWeights() + { + queryWeight = Enclosing_Instance.Boost; + return queryWeight*queryWeight; + } + + public override void Normalize(float queryNorm) + { + this.queryNorm = queryNorm; + queryWeight *= this.queryNorm; + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + return new MatchAllScorer(enclosingInstance, reader, similarity, this, Enclosing_Instance.normsField != null?reader.Norms(Enclosing_Instance.normsField):null); + } + + public override Explanation Explain(IndexReader reader, int doc) + { + // explain query weight + Explanation queryExpl = new ComplexExplanation(true, Value, "MatchAllDocsQuery, product of:"); + if (Enclosing_Instance.Boost != 1.0f) + { + queryExpl.AddDetail(new Explanation(Enclosing_Instance.Boost, "boost")); + } + queryExpl.AddDetail(new Explanation(queryNorm, "queryNorm")); + + return queryExpl; + } + } + + public override Weight CreateWeight(Searcher searcher) + { + return new MatchAllDocsWeight(this, searcher); + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("*:*"); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + public override bool Equals(System.Object o) + { + if (!(o is MatchAllDocsQuery)) + return false; + MatchAllDocsQuery other = (MatchAllDocsQuery) o; + return this.Boost == other.Boost; + } + + public override int GetHashCode() + { + return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ 0x1AA71190; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/MultiPhraseQuery.cs b/external/Lucene.Net.Light/src/core/Search/MultiPhraseQuery.cs new file mode 100644 index 0000000000..38e98ca0da --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/MultiPhraseQuery.cs @@ -0,0 +1,496 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; +using MultipleTermPositions = Lucene.Net.Index.MultipleTermPositions; +using Term = Lucene.Net.Index.Term; +using TermPositions = Lucene.Net.Index.TermPositions; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search +{ + + /// MultiPhraseQuery is a generalized version of PhraseQuery, with an added + /// method . + /// To use this class, to search for the phrase "Microsoft app*" first use + /// add(Term) on the term "Microsoft", then find all terms that have "app" as + /// prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[] + /// terms) to add them to the query. + /// + /// + /// 1.0 + /// + [Serializable] + public class MultiPhraseQuery:Query + { + private System.String field; + private System.Collections.Generic.List termArrays = new System.Collections.Generic.List(); + private System.Collections.Generic.List positions = new System.Collections.Generic.List(); + + private int slop = 0; + + /// Gets or sets the phrase slop for this query. + /// + /// + public virtual int Slop + { + get { return slop; } + set { slop = value; } + } + + /// Add a single term at the next position in the phrase. + /// + /// + public virtual void Add(Term term) + { + Add(new Term[]{term}); + } + + /// Add multiple terms at the next position in the phrase. Any of the terms + /// may match. + /// + /// + /// + /// + public virtual void Add(Term[] terms) + { + int position = 0; + if (positions.Count > 0) + position = positions[positions.Count - 1] + 1; + + Add(terms, position); + } + + /// Allows to specify the relative position of terms within the phrase. + /// + /// + /// + /// + /// + /// + /// + /// + public virtual void Add(Term[] terms, int position) + { + if (termArrays.Count == 0) + field = terms[0].Field; + + for (int i = 0; i < terms.Length; i++) + { + if ((System.Object) terms[i].Field != (System.Object) field) + { + throw new System.ArgumentException("All phrase terms must be in the same field (" + field + "): " + terms[i]); + } + } + + termArrays.Add(terms); + positions.Add(position); + } + + /// Returns a List<Term[]> of the terms in the multiphrase. + /// Do not modify the List or its contents. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual System.Collections.Generic.IList GetTermArrays() + { + return termArrays.AsReadOnly(); + } + + /// Returns the relative positions of terms in this phrase. + public virtual int[] GetPositions() + { + int[] result = new int[positions.Count]; + for (int i = 0; i < positions.Count; i++) + result[i] = positions[i]; + return result; + } + + // inherit javadoc + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + foreach(Term[] arr in termArrays) + { + terms.UnionWith(arr); + } + } + + + [Serializable] + private class MultiPhraseWeight:Weight + { + private void InitBlock(MultiPhraseQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private MultiPhraseQuery enclosingInstance; + public MultiPhraseQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private Similarity similarity; + private float value_Renamed; + private float idf; + private float queryNorm; + private float queryWeight; + + public MultiPhraseWeight(MultiPhraseQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = Enclosing_Instance.GetSimilarity(searcher); + + // compute idf + int maxDoc = searcher.MaxDoc; + foreach (Term[] terms in enclosingInstance.termArrays) + { + foreach (Term term in terms) + { + idf += similarity.Idf(searcher.DocFreq(term), maxDoc); + } + } + } + + public override Query Query + { + get { return Enclosing_Instance; } + } + + public override float Value + { + get { return value_Renamed; } + } + + public override float GetSumOfSquaredWeights() + { + queryWeight = idf*Enclosing_Instance.Boost; // compute query weight + return queryWeight*queryWeight; // square it + } + + public override void Normalize(float queryNorm) + { + this.queryNorm = queryNorm; + queryWeight *= queryNorm; // normalize query weight + value_Renamed = queryWeight * idf; // idf for document + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + if (Enclosing_Instance.termArrays.Count == 0) + // optimize zero-term case + return null; + + TermPositions[] tps = new TermPositions[Enclosing_Instance.termArrays.Count]; + for (int i = 0; i < tps.Length; i++) + { + Term[] terms = Enclosing_Instance.termArrays[i]; + + TermPositions p; + if (terms.Length > 1) + p = new MultipleTermPositions(reader, terms); + else + p = reader.TermPositions(terms[0]); + + if (p == null) + return null; + + tps[i] = p; + } + + if (Enclosing_Instance.slop == 0) + return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field)); + else + return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field)); + } + + public override Explanation Explain(IndexReader reader, int doc) + { + ComplexExplanation result = new ComplexExplanation(); + result.Description = "weight(" + Query + " in " + doc + "), product of:"; + + Explanation idfExpl = new Explanation(idf, "idf(" + Query + ")"); + + // explain query weight + Explanation queryExpl = new Explanation(); + queryExpl.Description = "queryWeight(" + Query + "), product of:"; + + Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost"); + if (Enclosing_Instance.Boost != 1.0f) + queryExpl.AddDetail(boostExpl); + + queryExpl.AddDetail(idfExpl); + + Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); + queryExpl.AddDetail(queryNormExpl); + + queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value; + + result.AddDetail(queryExpl); + + // explain field weight + ComplexExplanation fieldExpl = new ComplexExplanation(); + fieldExpl.Description = "fieldWeight(" + Query + " in " + doc + "), product of:"; + + PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false); + if (scorer == null) + { + return new Explanation(0.0f, "no matching docs"); + } + Explanation tfExplanation = new Explanation(); + int d = scorer.Advance(doc); + float phraseFreq = (d == doc) ? scorer.CurrentFreq() : 0.0f; + tfExplanation.Value = similarity.Tf(phraseFreq); + tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")"; + fieldExpl.AddDetail(tfExplanation); + fieldExpl.AddDetail(idfExpl); + + Explanation fieldNormExpl = new Explanation(); + byte[] fieldNorms = reader.Norms(Enclosing_Instance.field); + float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f; + fieldNormExpl.Value = fieldNorm; + fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")"; + fieldExpl.AddDetail(fieldNormExpl); + + fieldExpl.Match = tfExplanation.IsMatch; + fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value; + + result.AddDetail(fieldExpl); + System.Boolean? tempAux = fieldExpl.Match; + result.Match = tempAux; + + // combine them + result.Value = queryExpl.Value * fieldExpl.Value; + + if (queryExpl.Value == 1.0f) + return fieldExpl; + + return result; + } + } + + public override Query Rewrite(IndexReader reader) + { + if (termArrays.Count == 1) + { + // optimize one-term case + Term[] terms = termArrays[0]; + BooleanQuery boq = new BooleanQuery(true); + for (int i = 0; i < terms.Length; i++) + { + boq.Add(new TermQuery(terms[i]), Occur.SHOULD); + } + boq.Boost = Boost; + return boq; + } + else + { + return this; + } + } + + public override Weight CreateWeight(Searcher searcher) + { + return new MultiPhraseWeight(this, searcher); + } + + /// Prints a user-readable version of this query. + public override System.String ToString(System.String f) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + if (!field.Equals(f)) + { + buffer.Append(field); + buffer.Append(":"); + } + + buffer.Append("\""); + System.Collections.Generic.IEnumerator i = termArrays.GetEnumerator(); + bool first = true; + while (i.MoveNext()) + { + if (first) + { + first = false; + } + else + { + buffer.Append(" "); + } + + Term[] terms = i.Current; + if (terms.Length > 1) + { + buffer.Append("("); + for (int j = 0; j < terms.Length; j++) + { + buffer.Append(terms[j].Text); + if (j < terms.Length - 1) + buffer.Append(" "); + } + buffer.Append(")"); + } + else + { + buffer.Append(terms[0].Text); + } + } + buffer.Append("\""); + + if (slop != 0) + { + buffer.Append("~"); + buffer.Append(slop); + } + + buffer.Append(ToStringUtils.Boost(Boost)); + + return buffer.ToString(); + } + + + /// Returns true if o is equal to this. + public override bool Equals(System.Object o) + { + if (!(o is MultiPhraseQuery)) + return false; + MultiPhraseQuery other = (MultiPhraseQuery) o; + bool eq = this.Boost == other.Boost && this.slop == other.slop; + if(!eq) + { + return false; + } + eq = this.termArrays.Count.Equals(other.termArrays.Count); + if (!eq) + { + return false; + } + + for (int i = 0; i < this.termArrays.Count; i++) + { + if (!Compare.CompareTermArrays((Term[])this.termArrays[i], (Term[])other.termArrays[i])) + { + return false; + } + } + if(!eq) + { + return false; + } + eq = this.positions.Count.Equals(other.positions.Count); + if (!eq) + { + return false; + } + for (int i = 0; i < this.positions.Count; i++) + { + if (!((int)this.positions[i] == (int)other.positions[i])) + { + return false; + } + } + return true; + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + int posHash = 0; + foreach(int pos in positions) + { + posHash += pos.GetHashCode(); + } + return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ slop ^ TermArraysHashCode() ^ posHash ^ 0x4AC65113; + } + + // Breakout calculation of the termArrays hashcode + private int TermArraysHashCode() + { + int hashCode = 1; + foreach(Term[] termArray in termArrays) + { + // Java uses Arrays.hashCode(termArray) + hashCode = 31*hashCode + (termArray == null ? 0 : ArraysHashCode(termArray)); + } + return hashCode; + } + + private int ArraysHashCode(Term[] termArray) + { + if (termArray == null) + return 0; + + int result = 1; + + for (int i = 0; i < termArray.Length; i++) + { + Term term = termArray[i]; + result = 31 * result + (term == null?0:term.GetHashCode()); + } + + return result; + } + + // Breakout calculation of the termArrays equals + private bool TermArraysEquals(System.Collections.Generic.List termArrays1, System.Collections.Generic.List termArrays2) + { + if (termArrays1.Count != termArrays2.Count) + { + return false; + } + var iterator1 = termArrays1.GetEnumerator(); + var iterator2 = termArrays2.GetEnumerator(); + while (iterator1.MoveNext()) + { + Term[] termArray1 = iterator1.Current; + Term[] termArray2 = iterator2.Current; + if (!(termArray1 == null ? termArray2 == null : TermEquals(termArray1, termArray2))) + { + return false; + } + } + return true; + } + + public static bool TermEquals(System.Array array1, System.Array array2) + { + bool result = false; + if ((array1 == null) && (array2 == null)) + result = true; + else if ((array1 != null) && (array2 != null)) + { + if (array1.Length == array2.Length) + { + int length = array1.Length; + result = true; + for (int index = 0; index < length; index++) + { + if (!(array1.GetValue(index).Equals(array2.GetValue(index)))) + { + result = false; + break; + } + } + } + } + return result; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/MultiSearcher.cs b/external/Lucene.Net.Light/src/core/Search/MultiSearcher.cs new file mode 100644 index 0000000000..3d6ef075c9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/MultiSearcher.cs @@ -0,0 +1,458 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Support; +using Lucene.Net.Util; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using ReaderUtil = Lucene.Net.Util.ReaderUtil; + +namespace Lucene.Net.Search +{ + + /// Implements search over a set of Searchables. + /// + ///

Applications usually need only call the inherited + /// or methods. + ///

+ public class MultiSearcher:Searcher + { + private class AnonymousClassCollector:Collector + { + public AnonymousClassCollector(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance) + { + InitBlock(collector, start, enclosingInstance); + } + private void InitBlock(Lucene.Net.Search.Collector collector, int start, MultiSearcher enclosingInstance) + { + this.collector = collector; + this.start = start; + this.enclosingInstance = enclosingInstance; + } + private Lucene.Net.Search.Collector collector; + private int start; + private MultiSearcher enclosingInstance; + public MultiSearcher Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public override void SetScorer(Scorer scorer) + { + collector.SetScorer(scorer); + } + public override void Collect(int doc) + { + collector.Collect(doc); + } + public override void SetNextReader(IndexReader reader, int docBase) + { + collector.SetNextReader(reader, start + docBase); + } + + public override bool AcceptsDocsOutOfOrder + { + get { return collector.AcceptsDocsOutOfOrder; } + } + } + + /// Document Frequency cache acting as a Dummy-Searcher. This class is no + /// full-fledged Searcher, but only supports the methods necessary to + /// initialize Weights. + /// + private class CachedDfSource:Searcher + { + private readonly Dictionary dfMap; // Map from Terms to corresponding doc freqs + private readonly int maxDoc; // document count + + public CachedDfSource(Dictionary dfMap, int maxDoc, Similarity similarity) + { + this.dfMap = dfMap; + this.maxDoc = maxDoc; + Similarity = similarity; + } + + public override int DocFreq(Term term) + { + int df; + try + { + df = dfMap[term]; + } + catch (KeyNotFoundException) // C# equiv. of java code. + { + throw new System.ArgumentException("df for term " + term.Text + " not available"); + } + return df; + } + + public override int[] DocFreqs(Term[] terms) + { + int[] result = new int[terms.Length]; + for (int i = 0; i < terms.Length; i++) + { + result[i] = DocFreq(terms[i]); + } + return result; + } + + public override int MaxDoc + { + get { return maxDoc; } + } + + public override Query Rewrite(Query query) + { + // this is a bit of a hack. We know that a query which + // creates a Weight based on this Dummy-Searcher is + // always already rewritten (see preparedWeight()). + // Therefore we just return the unmodified query here + return query; + } + + // TODO: This probably shouldn't throw an exception? + protected override void Dispose(bool disposing) + { + throw new System.NotSupportedException(); + } + + public override Document Doc(int i) + { + throw new System.NotSupportedException(); + } + + public override Document Doc(int i, FieldSelector fieldSelector) + { + throw new System.NotSupportedException(); + } + + public override Explanation Explain(Weight weight, int doc) + { + throw new System.NotSupportedException(); + } + + public override void Search(Weight weight, Filter filter, Collector results) + { + throw new System.NotSupportedException(); + } + + public override TopDocs Search(Weight weight, Filter filter, int n) + { + throw new System.NotSupportedException(); + } + + public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort) + { + throw new System.NotSupportedException(); + } + } + + private Searchable[] searchables; + private int[] starts; + private int maxDoc = 0; + + private bool isDisposed; + + /// Creates a searcher which searches searchers. + public MultiSearcher(params Searchable[] searchables) + { + this.searchables = searchables; + + starts = new int[searchables.Length + 1]; // build starts array + for (int i = 0; i < searchables.Length; i++) + { + starts[i] = maxDoc; + maxDoc += searchables[i].MaxDoc; // compute maxDocs + } + starts[searchables.Length] = maxDoc; + } + + /// Return the array of s this searches. + public virtual Searchable[] GetSearchables() + { + return searchables; + } + + protected internal virtual int[] GetStarts() + { + return starts; + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + for (int i = 0; i < searchables.Length; i++) + searchables[i].Close(); + } + + isDisposed = true; + } + + public override int DocFreq(Term term) + { + int docFreq = 0; + for (int i = 0; i < searchables.Length; i++) + docFreq += searchables[i].DocFreq(term); + return docFreq; + } + + // inherit javadoc + public override Document Doc(int n) + { + int i = SubSearcher(n); // find searcher index + return searchables[i].Doc(n - starts[i]); // dispatch to searcher + } + + // inherit javadoc + public override Document Doc(int n, FieldSelector fieldSelector) + { + int i = SubSearcher(n); // find searcher index + return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher + } + + /// Returns index of the searcher for document n in the array + /// used to construct this searcher. + /// + public virtual int SubSearcher(int n) + { + // find searcher for doc n: + return ReaderUtil.SubIndex(n, starts); + } + + /// Returns the document number of document n within its + /// sub-index. + /// + public virtual int SubDoc(int n) + { + return n - starts[SubSearcher(n)]; + } + + public override int MaxDoc + { + get { return maxDoc; } + } + + public override TopDocs Search(Weight weight, Filter filter, int nDocs) + { + HitQueue hq = new HitQueue(nDocs, false); + int totalHits = 0; + + var lockObj = new object(); + for (int i = 0; i < searchables.Length; i++) + { + // search each searcher + // use NullLock, we don't care about synchronization for these + TopDocs docs = MultiSearcherCallableNoSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, nDocs, hq, i, starts); + totalHits += docs.TotalHits; // update totalHits + } + + ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()]; + for (int i = hq.Size() - 1; i >= 0; i--) + // put docs in array + scoreDocs2[i] = hq.Pop(); + + float maxScore = (totalHits == 0)?System.Single.NegativeInfinity:scoreDocs2[0].Score; + + return new TopDocs(totalHits, scoreDocs2, maxScore); + } + + public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort) + { + var hq = new FieldDocSortedHitQueue(n); + int totalHits = 0; + + float maxScore = System.Single.NegativeInfinity; + + var lockObj = new object(); + for (int i = 0; i < searchables.Length; i++) + { + // search each searcher + // use NullLock, we don't care about synchronization for these + TopFieldDocs docs = MultiSearcherCallableWithSort(ThreadLock.NullLock, lockObj, searchables[i], weight, filter, n, hq, sort, + i, starts); + totalHits += docs.TotalHits; + maxScore = System.Math.Max(maxScore, docs.MaxScore); + } + + ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()]; + for (int i = hq.Size() - 1; i >= 0; i--) + // put docs in array + scoreDocs2[i] = hq.Pop(); + + return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore); + } + + /// + public override void Search(Weight weight, Filter filter, Collector collector) + { + for (int i = 0; i < searchables.Length; i++) + { + int start = starts[i]; + + Collector hc = new AnonymousClassCollector(collector, start, this); + searchables[i].Search(weight, filter, hc); + } + } + + public override Query Rewrite(Query original) + { + Query[] queries = new Query[searchables.Length]; + for (int i = 0; i < searchables.Length; i++) + { + queries[i] = searchables[i].Rewrite(original); + } + return queries[0].Combine(queries); + } + + public override Explanation Explain(Weight weight, int doc) + { + int i = SubSearcher(doc); // find searcher index + return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher + } + + /// Create weight in multiple index scenario. + /// + /// Distributed query processing is done in the following steps: + /// 1. rewrite query + /// 2. extract necessary terms + /// 3. collect dfs for these terms from the Searchables + /// 4. create query weight using aggregate dfs. + /// 5. distribute that weight to Searchables + /// 6. merge results + /// + /// Steps 1-4 are done here, 5+6 in the search() methods + /// + /// + /// rewritten queries + /// + public /*protected internal*/ override Weight CreateWeight(Query original) + { + // step 1 + Query rewrittenQuery = Rewrite(original); + + // step 2 + ISet terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); + rewrittenQuery.ExtractTerms(terms); + + // step3 + Term[] allTermsArray = terms.ToArray(); + int[] aggregatedDfs = new int[terms.Count]; + for (int i = 0; i < searchables.Length; i++) + { + int[] dfs = searchables[i].DocFreqs(allTermsArray); + for (int j = 0; j < aggregatedDfs.Length; j++) + { + aggregatedDfs[j] += dfs[j]; + } + } + + var dfMap = new Dictionary(); + for (int i = 0; i < allTermsArray.Length; i++) + { + dfMap[allTermsArray[i]] = aggregatedDfs[i]; + } + + // step4 + int numDocs = MaxDoc; + CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, Similarity); + + return rewrittenQuery.Weight(cacheSim); + } + + internal Func MultiSearcherCallableNoSort = + (threadLock, lockObj, searchable, weight, filter, nDocs, hq, i, starts) => + { + TopDocs docs = searchable.Search(weight, filter, nDocs); + ScoreDoc[] scoreDocs = docs.ScoreDocs; + for(int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq + { + ScoreDoc scoreDoc = scoreDocs[j]; + scoreDoc.Doc += starts[i]; //convert doc + //it would be so nice if we had a thread-safe insert + try + { + threadLock.Enter(lockObj); + if (scoreDoc == hq.InsertWithOverflow(scoreDoc)) + break; + } + finally + { + threadLock.Exit(lockObj); + } + } + return docs; + }; + + internal Func + MultiSearcherCallableWithSort = (threadLock, lockObj, searchable, weight, filter, nDocs, hq, sort, i, starts) => + { + TopFieldDocs docs = searchable.Search(weight, filter, nDocs, sort); + // if one of the Sort fields is FIELD_DOC, need to fix its values, so that + // it will break ties by doc Id properly. Otherwise, it will compare to + // 'relative' doc Ids, that belong to two different searchables. + for (int j = 0; j < docs.fields.Length; j++) + { + if (docs.fields[j].Type == SortField.DOC) + { + // iterate over the score docs and change their fields value + for (int j2 = 0; j2 < docs.ScoreDocs.Length; j2++) + { + FieldDoc fd = (FieldDoc) docs.ScoreDocs[j2]; + fd.fields[j] = (int)fd.fields[j] + starts[i]; + } + break; + } + } + try + { + threadLock.Enter(lockObj); + hq.SetFields(docs.fields); + } + finally + { + threadLock.Exit(lockObj); + } + + ScoreDoc[] scoreDocs = docs.ScoreDocs; + for (int j = 0; j < scoreDocs.Length; j++) // merge scoreDocs into hq + { + FieldDoc fieldDoc = (FieldDoc) scoreDocs[j]; + fieldDoc.Doc += starts[i]; //convert doc + //it would be so nice if we had a thread-safe insert + lock (lockObj) + { + if (fieldDoc == hq.InsertWithOverflow(fieldDoc)) + break; + + } + } + return docs; + }; + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/MultiTermQuery.cs b/external/Lucene.Net.Light/src/core/Search/MultiTermQuery.cs new file mode 100644 index 0000000000..430a521808 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/MultiTermQuery.cs @@ -0,0 +1,465 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using QueryParser = Lucene.Net.QueryParsers.QueryParser; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search +{ + + /// An abstract that matches documents + /// containing a subset of terms provided by a + /// enumeration. + /// + ///

This query cannot be used directly; you must subclass + /// it and define to provide a + /// that iterates through the terms to be + /// matched. + /// + ///

NOTE: if is either + /// or + ///, you may encounter a + /// exception during + /// searching, which happens when the number of terms to be + /// searched exceeds + ///. Setting + /// to + /// prevents this. + /// + ///

The recommended rewrite method is + ///: it doesn't spend CPU + /// computing unhelpful scores, and it tries to pick the most + /// performant rewrite method given the query. + /// + /// Note that produces + /// MultiTermQueries using + /// by default. + ///

+ [Serializable] + public abstract class MultiTermQuery:Query + { + [Serializable] + public class AnonymousClassConstantScoreAutoRewrite:ConstantScoreAutoRewrite + { + public override int TermCountCutoff + { + set { throw new System.NotSupportedException("Please create a private instance"); } + } + + public override double DocCountPercent + { + set { throw new System.NotSupportedException("Please create a private instance"); } + } + + // Make sure we are still a singleton even after deserializing + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + } + } + protected internal RewriteMethod internalRewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + [NonSerialized] + internal int numberOfTerms = 0; + + [Serializable] + private sealed class ConstantScoreFilterRewrite:RewriteMethod + { + public override Query Rewrite(IndexReader reader, MultiTermQuery query) + { + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.Boost = query.Boost; + return result; + } + + // Make sure we are still a singleton even after deserializing + internal System.Object ReadResolve() + { + return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE; + } + } + + /// A rewrite method that first creates a private Filter, + /// by visiting each term in sequence and marking all docs + /// for that term. Matching documents are assigned a + /// constant score equal to the query's boost. + /// + ///

This method is faster than the BooleanQuery + /// rewrite methods when the number of matched terms or + /// matched documents is non-trivial. Also, it will never + /// hit an errant + /// exception. + /// + ///

+ /// + /// + public static readonly RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new ConstantScoreFilterRewrite(); + + [Serializable] + private class ScoringBooleanQueryRewrite:RewriteMethod + { + public override Query Rewrite(IndexReader reader, MultiTermQuery query) + { + + FilteredTermEnum enumerator = query.GetEnum(reader); + BooleanQuery result = new BooleanQuery(true); + int count = 0; + try + { + do + { + Term t = enumerator.Term; + if (t != null) + { + TermQuery tq = new TermQuery(t); // found a match + tq.Boost = query.Boost * enumerator.Difference(); // set the boost + result.Add(tq, Occur.SHOULD); // add to query + count++; + } + } + while (enumerator.Next()); + } + finally + { + enumerator.Close(); + } + query.IncTotalNumberOfTerms(count); + return result; + } + + // Make sure we are still a singleton even after deserializing + protected internal virtual System.Object ReadResolve() + { + return Lucene.Net.Search.MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE; + } + } + + /// A rewrite method that first translates each term into + /// clause in a + /// BooleanQuery, and keeps the scores as computed by the + /// query. Note that typically such scores are + /// meaningless to the user, and require non-trivial CPU + /// to compute, so it's almost always better to use + /// instead. + /// + ///

NOTE: This rewrite method will hit + /// if the number of terms + /// exceeds . + /// + ///

+ /// + /// + public static readonly RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = new ScoringBooleanQueryRewrite(); + + [Serializable] + private class ConstantScoreBooleanQueryRewrite:ScoringBooleanQueryRewrite + { + public override Query Rewrite(IndexReader reader, MultiTermQuery query) + { + // strip the scores off + Query result = new ConstantScoreQuery(new QueryWrapperFilter(base.Rewrite(reader, query))); + result.Boost = query.Boost; + return result; + } + + // Make sure we are still a singleton even after deserializing + protected internal override System.Object ReadResolve() + { + return Lucene.Net.Search.MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + } + } + + /// Like except + /// scores are not computed. Instead, each matching + /// document receives a constant score equal to the + /// query's boost. + /// + ///

NOTE: This rewrite method will hit + /// if the number of terms + /// exceeds . + /// + ///

+ /// + /// + public static readonly RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = new ConstantScoreBooleanQueryRewrite(); + + + /// A rewrite method that tries to pick the best + /// constant-score rewrite method based on term and + /// document counts from the query. If both the number of + /// terms and documents is small enough, then + /// is used. + /// Otherwise, is + /// used. + /// + [Serializable] + public class ConstantScoreAutoRewrite:RewriteMethod + { + public ConstantScoreAutoRewrite() + { + InitBlock(); + } + private void InitBlock() + { + termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; + docCountPercent = DEFAULT_DOC_COUNT_PERCENT; + } + + // Defaults derived from rough tests with a 20.0 million + // doc Wikipedia index. With more than 350 terms in the + // query, the filter method is fastest: + public static int DEFAULT_TERM_COUNT_CUTOFF = 350; + + // If the query will hit more than 1 in 1000 of the docs + // in the index (0.1%), the filter method is fastest: + public static double DEFAULT_DOC_COUNT_PERCENT = 0.1; + + private int termCountCutoff; + private double docCountPercent; + + /// If the number of terms in this query is equal to or + /// larger than this setting then + /// is used. + /// + public virtual int TermCountCutoff + { + get { return termCountCutoff; } + set { termCountCutoff = value; } + } + + /// If the number of documents to be visited in the + /// postings exceeds this specified percentage of the + /// MaxDoc for the index, then + /// is used. + /// + /// 0.0 to 100.0 + public virtual double DocCountPercent + { + get { return docCountPercent; } + set { docCountPercent = value; } + } + + public override Query Rewrite(IndexReader reader, MultiTermQuery query) + { + // Get the enum and start visiting terms. If we + // exhaust the enum before hitting either of the + // cutoffs, we use ConstantBooleanQueryRewrite; else, + // ConstantFilterRewrite: + ICollection pendingTerms = new List(); + int docCountCutoff = (int) ((docCountPercent / 100.0) * reader.MaxDoc); + int termCountLimit = System.Math.Min(BooleanQuery.MaxClauseCount, termCountCutoff); + int docVisitCount = 0; + + FilteredTermEnum enumerator = query.GetEnum(reader); + try + { + while (true) + { + Term t = enumerator.Term; + if (t != null) + { + pendingTerms.Add(t); + // Loading the TermInfo from the terms dict here + // should not be costly, because 1) the + // query/filter will load the TermInfo when it + // runs, and 2) the terms dict has a cache: + docVisitCount += reader.DocFreq(t); + } + + if (pendingTerms.Count >= termCountLimit || docVisitCount >= docCountCutoff) + { + // Too many terms -- make a filter. + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.Boost = query.Boost; + return result; + } + else if (!enumerator.Next()) + { + // Enumeration is done, and we hit a small + // enough number of terms & docs -- just make a + // BooleanQuery, now + BooleanQuery bq = new BooleanQuery(true); + foreach(Term term in pendingTerms) + { + TermQuery tq = new TermQuery(term); + bq.Add(tq, Occur.SHOULD); + } + // Strip scores + Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); + result.Boost = query.Boost; + query.IncTotalNumberOfTerms(pendingTerms.Count); + return result; + } + } + } + finally + { + enumerator.Close(); + } + } + + public override int GetHashCode() + { + int prime = 1279; + return (int) (prime * termCountCutoff + BitConverter.DoubleToInt64Bits(docCountPercent)); + } + + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (GetType() != obj.GetType()) + return false; + + ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj; + if (other.termCountCutoff != termCountCutoff) + { + return false; + } + + if (BitConverter.DoubleToInt64Bits(other.docCountPercent) != BitConverter.DoubleToInt64Bits(docCountPercent)) + { + return false; + } + + return true; + } + } + + /// Read-only default instance of + ///, with + /// set to + /// + /// + /// and + /// set to + /// + ///. + /// Note that you cannot alter the configuration of this + /// instance; you'll need to create a private instance + /// instead. + /// + public static readonly RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + + /// Constructs a query matching terms that cannot be represented with a single + /// Term. + /// + protected MultiTermQuery() + { + } + + /// Construct the enumeration to be used, expanding the pattern term. + protected internal abstract FilteredTermEnum GetEnum(IndexReader reader); + + /// Expert: Return the number of unique terms visited during execution of the query. + /// If there are many of them, you may consider using another query type + /// or optimize your total term count in index. + ///

This method is not thread safe, be sure to only call it when no query is running! + /// If you re-use the same query instance for another + /// search, be sure to first reset the term counter + /// with . + ///

On optimized indexes / no MultiReaders, you get the correct number of + /// unique terms for the whole index. Use this number to compare different queries. + /// For non-optimized indexes this number can also be achived in + /// non-constant-score mode. In constant-score mode you get the total number of + /// terms seeked for all segments / sub-readers. + ///

+ /// + /// + public virtual int TotalNumberOfTerms + { + get { return numberOfTerms; } + } + + /// Expert: Resets the counting of unique terms. + /// Do this before executing the query/filter. + /// + /// + /// + public virtual void ClearTotalNumberOfTerms() + { + numberOfTerms = 0; + } + + protected internal virtual void IncTotalNumberOfTerms(int inc) + { + numberOfTerms += inc; + } + + public override Query Rewrite(IndexReader reader) + { + return internalRewriteMethod.Rewrite(reader, this); + } + + /// Sets the rewrite method to be used when executing the + /// query. You can use one of the four core methods, or + /// implement your own subclass of . + /// + public virtual RewriteMethod RewriteMethod + { + get { return internalRewriteMethod; } + set { internalRewriteMethod = value; } + } + + //@Override + public override int GetHashCode() + { + int prime = 31; + int result = 1; + result = prime * result + System.Convert.ToInt32(Boost); + result = prime * result; + result += internalRewriteMethod.GetHashCode(); + return result; + } + + //@Override + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (GetType() != obj.GetType()) + return false; + MultiTermQuery other = (MultiTermQuery) obj; + if (System.Convert.ToInt32(Boost) != System.Convert.ToInt32(other.Boost)) + return false; + if (!internalRewriteMethod.Equals(other.internalRewriteMethod)) + { + return false; + } + return true; + } + static MultiTermQuery() + { + CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new AnonymousClassConstantScoreAutoRewrite(); + } + } + + /// Abstract class that defines how the query is rewritten. + [Serializable] + public abstract class RewriteMethod + { + public abstract Query Rewrite(IndexReader reader, MultiTermQuery query); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/MultiTermQueryWrapperFilter.cs b/external/Lucene.Net.Light/src/core/Search/MultiTermQueryWrapperFilter.cs new file mode 100644 index 0000000000..3cffb1a65e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/MultiTermQueryWrapperFilter.cs @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using TermDocs = Lucene.Net.Index.TermDocs; +using TermEnum = Lucene.Net.Index.TermEnum; +using OpenBitSet = Lucene.Net.Util.OpenBitSet; + +namespace Lucene.Net.Search +{ + + /// A wrapper for , that exposes its + /// functionality as a . + ///

+ /// MultiTermQueryWrapperFilter is not designed to + /// be used by itself. Normally you subclass it to provide a Filter + /// counterpart for a subclass. + ///

+ /// For example, and extend + /// MultiTermQueryWrapperFilter. + /// This class also provides the functionality behind + /// ; + /// this is why it is not abstract. + ///

+ [Serializable] + public class MultiTermQueryWrapperFilter : Filter + where T : MultiTermQuery + { + protected internal T query; + + /// Wrap a as a Filter. + protected internal MultiTermQueryWrapperFilter(T query) + { + this.query = query; + } + + //@Override + public override System.String ToString() + { + // query.toString should be ok for the filter, too, if the query boost is 1.0f + return query.ToString(); + } + + //@Override + public override bool Equals(System.Object o) + { + if (o == this) + return true; + if (o == null) + return false; + if (this.GetType().Equals(o.GetType())) + { + return this.query.Equals(((MultiTermQueryWrapperFilter) o).query); + } + return false; + } + + //@Override + public override int GetHashCode() + { + return query.GetHashCode(); + } + + /// Expert: Return the number of unique terms visited during execution of the filter. + /// If there are many of them, you may consider using another filter type + /// or optimize your total term count in index. + ///

This method is not thread safe, be sure to only call it when no filter is running! + /// If you re-use the same filter instance for another + /// search, be sure to first reset the term counter + /// with . + ///

+ /// + /// + public virtual int TotalNumberOfTerms + { + get { return query.TotalNumberOfTerms; } + } + + /// Expert: Resets the counting of unique terms. + /// Do this before executing the filter. + /// + /// + /// + public virtual void ClearTotalNumberOfTerms() + { + query.ClearTotalNumberOfTerms(); + } + + public override DocIdSet GetDocIdSet(IndexReader reader) + { + TermEnum enumerator = query.GetEnum(reader); + try + { + // if current term in enum is null, the enum is empty -> shortcut + if (enumerator.Term == null) + return DocIdSet.EMPTY_DOCIDSET; + // else fill into an OpenBitSet + OpenBitSet bitSet = new OpenBitSet(reader.MaxDoc); + int[] docs = new int[32]; + int[] freqs = new int[32]; + TermDocs termDocs = reader.TermDocs(); + try + { + int termCount = 0; + do + { + Term term = enumerator.Term; + if (term == null) + break; + termCount++; + termDocs.Seek(term); + while (true) + { + int count = termDocs.Read(docs, freqs); + if (count != 0) + { + for (int i = 0; i < count; i++) + { + bitSet.Set(docs[i]); + } + } + else + { + break; + } + } + } while (enumerator.Next()); + + query.IncTotalNumberOfTerms(termCount); // {{Aroush-2.9}} is the use of 'temp' as is right? + } + finally + { + termDocs.Close(); + } + + return bitSet; + } + finally + { + enumerator.Close(); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/NumericRangeFilter.cs b/external/Lucene.Net.Light/src/core/Search/NumericRangeFilter.cs new file mode 100644 index 0000000000..5ca079089a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/NumericRangeFilter.cs @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream; +using NumericField = Lucene.Net.Documents.NumericField; +using NumericUtils = Lucene.Net.Util.NumericUtils; + +namespace Lucene.Net.Search +{ + + /// A that only accepts numeric values within + /// a specified range. To use this, you must first index the + /// numeric values using (expert: + ///). + /// + ///

You create a new NumericRangeFilter with the static + /// factory methods, eg: + /// + /// + /// Filter f = NumericRangeFilter.newFloatRange("weight", + /// new Float(0.3f), new Float(0.10f), + /// true, true); + /// + /// + /// accepts all documents whose float valued "weight" field + /// ranges from 0.3 to 0.10, inclusive. + /// See for details on how Lucene + /// indexes and searches numeric valued fields. + /// + ///

NOTE: This API is experimental and + /// might change in incompatible ways in the next + /// release. + /// + ///

+ /// 2.9 + /// + /// + [Serializable] + public sealed class NumericRangeFilter : MultiTermQueryWrapperFilter> + where T : struct, IComparable + // real numbers in C# are structs and IComparable with themselves, best constraint we have + { + internal NumericRangeFilter(NumericRangeQuery query) + : base(query) + { + } + + /// Returns the field name for this filter + public string Field + { + get { return query.Field; } + } + + /// Returns true if the lower endpoint is inclusive + public bool IncludesMin + { + get { return query.IncludesMin; } + } + + /// Returns true if the upper endpoint is inclusive + public bool IncludesMax + { + get { return query.IncludesMax; } + } + + /// Returns the lower value of this range filter + public T? Min + { + get { return query.Min; } + } + + /// Returns the upper value of this range filter + public T? Max + { + get { return query.Max; } + } + } + + public static class NumericRangeFilter + { + /// Factory that creates a NumericRangeFilter, that filters a long + /// range using the given precisionStep. + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeFilter NewLongRange(System.String field, int precisionStep, long? min, long? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeFilter(NumericRangeQuery.NewLongRange(field, precisionStep, min, max, minInclusive, maxInclusive)); + } + + /// Factory that creates a NumericRangeFilter, that queries a long + /// range using the default precisionStep (4). + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeFilter NewLongRange(System.String field, long? min, long? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeFilter(NumericRangeQuery.NewLongRange(field, min, max, minInclusive, maxInclusive)); + } + + /// Factory that creates a NumericRangeFilter, that filters a int + /// range using the given precisionStep. + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeFilter NewIntRange(System.String field, int precisionStep, int? min, int? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeFilter(NumericRangeQuery.NewIntRange(field, precisionStep, min, max, minInclusive, maxInclusive)); + } + + /// Factory that creates a NumericRangeFilter, that queries a int + /// range using the default precisionStep (4). + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeFilter NewIntRange(System.String field, int? min, int? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeFilter(NumericRangeQuery.NewIntRange(field, min, max, minInclusive, maxInclusive)); + } + + /// Factory that creates a NumericRangeFilter, that filters a double + /// range using the given precisionStep. + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeFilter NewDoubleRange(System.String field, int precisionStep, double? min, double? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeFilter(NumericRangeQuery.NewDoubleRange(field, precisionStep, min, max, minInclusive, maxInclusive)); + } + + /// Factory that creates a NumericRangeFilter, that queries a double + /// range using the default precisionStep (4). + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeFilter NewDoubleRange(System.String field, double? min, double? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeFilter(NumericRangeQuery.NewDoubleRange(field, min, max, minInclusive, maxInclusive)); + } + + /// Factory that creates a NumericRangeFilter, that filters a float + /// range using the given precisionStep. + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeFilter NewFloatRange(System.String field, int precisionStep, float? min, float? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeFilter(NumericRangeQuery.NewFloatRange(field, precisionStep, min, max, minInclusive, maxInclusive)); + } + + /// Factory that creates a NumericRangeFilter, that queries a float + /// range using the default precisionStep (4). + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeFilter NewFloatRange(System.String field, float? min, float? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeFilter(NumericRangeQuery.NewFloatRange(field, min, max, minInclusive, maxInclusive)); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/NumericRangeQuery.cs b/external/Lucene.Net.Light/src/core/Search/NumericRangeQuery.cs new file mode 100644 index 0000000000..46b2025f56 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/NumericRangeQuery.cs @@ -0,0 +1,665 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Index; +using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream; +using NumericField = Lucene.Net.Documents.NumericField; +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using NumericUtils = Lucene.Net.Util.NumericUtils; +using StringHelper = Lucene.Net.Util.StringHelper; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search +{ + + ///

A that matches numeric values within a + /// specified range. To use this, you must first index the + /// numeric values using (expert: + ///). If your terms are instead textual, + /// you should use . + /// is the filter equivalent of this + /// query.

+ /// + ///

You create a new NumericRangeQuery with the static + /// factory methods, eg: + /// + /// + /// Query q = NumericRangeQuery.newFloatRange("weight", + /// new Float(0.3f), new Float(0.10f), + /// true, true); + /// + /// + /// matches all documents whose float valued "weight" field + /// ranges from 0.3 to 0.10, inclusive. + /// + ///

The performance of NumericRangeQuery is much better + /// than the corresponding because the + /// number of terms that must be searched is usually far + /// fewer, thanks to trie indexing, described below.

+ /// + ///

You can optionally specify a precisionStep + /// when creating this query. This is necessary if you've + /// changed this configuration from its default (4) during + /// indexing. Lower values consume more disk space but speed + /// up searching. Suitable values are between 1 and + /// 8. A good starting point to test is 4, + /// which is the default value for all Numeric* + /// classes. See below for + /// details. + /// + ///

This query defaults to + /// for + /// 32 bit (int/float) ranges with precisionStep <8 and 64 + /// bit (long/double) ranges with precisionStep <6. + /// Otherwise it uses + /// as the + /// number of terms is likely to be high. With precision + /// steps of <4, this query can be run with one of the + /// BooleanQuery rewrite methods without changing + /// BooleanQuery's default max clause count. + /// + ///

NOTE: This API is experimental and + /// might change in incompatible ways in the next release. + /// + ///

How it works

+ /// + ///

See the publication about panFMP, + /// where this algorithm was described (referred to as TrieRangeQuery): + /// + ///

Schindler, U, Diepenbroek, M, 2008. + /// Generic XML-based Framework for Metadata Portals. + /// Computers & Geosciences 34 (12), 1947-1955. + /// doi:10.1016/j.cageo.2008.02.023
+ /// + ///

A quote from this paper: Because Apache Lucene is a full-text + /// search engine and not a conventional database, it cannot handle numerical ranges + /// (e.g., field value is inside user defined bounds, even dates are numerical values). + /// We have developed an extension to Apache Lucene that stores + /// the numerical values in a special string-encoded format with variable precision + /// (all numerical values like doubles, longs, floats, and ints are converted to + /// lexicographic sortable string representations and stored with different precisions + /// (for a more detailed description of how the values are stored, + /// see ). A range is then divided recursively into multiple intervals for searching: + /// The center of the range is searched only with the lowest possible precision in the trie, + /// while the boundaries are matched more exactly. This reduces the number of terms dramatically.

+ /// + ///

For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that + /// uses a lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the + /// lowest precision. Overall, a range could consist of a theoretical maximum of + /// 7*255*2 + 255 = 3825 distinct terms (when there is a term for every distinct value of an + /// 8-byte-number in the index and the range covers almost all of them; a maximum of 255 distinct values is used + /// because it would always be possible to reduce the full 256 values to one term with degraded precision). + /// In practice, we have seen up to 300 terms in most cases (index with 500,000 metadata records + /// and a uniform value distribution).

+ /// + ///

Precision Step

+ ///

You can choose any precisionStep when encoding values. + /// Lower step values mean more precisions and so more terms in index (and index gets larger). + /// On the other hand, the maximum number of terms to match reduces, which optimized query speed. + /// The formula to calculate the maximum term count is: + /// + /// n = [ (bitsPerValue/precisionStep - 1) * (2^precisionStep - 1 ) * 2 ] + (2^precisionStep - 1 ) + /// + ///

(this formula is only correct, when bitsPerValue/precisionStep is an integer; + /// in other cases, the value must be rounded up and the last summand must contain the modulo of the division as + /// precision step). + /// For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465, and for a precision + /// step of 2, n = 31*3*2 + 3 = 189. But the faster search speed is reduced by more seeking + /// in the term enum of the index. Because of this, the ideal precisionStep value can only + /// be found out by testing. Important: You can index with a lower precision step value and test search speed + /// using a multiple of the original step value.

+ /// + ///

Good values for precisionStep are depending on usage and data type: + /// + /// The default for all data types is 4, which is used, when no precisionStep is given. + /// Ideal value in most cases for 64 bit data types (long, double) is 6 or 8. + /// Ideal value in most cases for 32 bit data types (int, float) is 4. + /// Steps >64 for long/double and >32 for int/float produces one token + /// per value in the index and querying is as slow as a conventional . But it can be used + /// to produce fields, that are solely used for sorting (in this case simply use as + /// precisionStep). Using NumericFields for sorting + /// is ideal, because building the field cache is much faster than with text-only numbers. + /// Sorting is also possible with range query optimized fields using one of the above precisionSteps. + /// + /// + ///

Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed + /// that in boolean rewrite mode (with raised clause count) + /// took about 30-40 secs to complete, in constant score filter rewrite mode took 5 secs + /// and executing this class took <100ms to complete (on an Opteron64 machine, Java 1.5, 8 bit + /// precision step). This query type was developed for a geographic portal, where the performance for + /// e.g. bounding boxes or exact date/time stamps is important.

+ /// + ///

+ /// 2.9 + /// + /// + [Serializable] + public sealed class NumericRangeQuery : MultiTermQuery + where T : struct, IComparable // best equiv constraint for java's number class + { + internal NumericRangeQuery(System.String field, int precisionStep, int valSize, T? min, T? max, bool minInclusive, bool maxInclusive) + { + System.Diagnostics.Debug.Assert((valSize == 32 || valSize == 64)); + if (precisionStep < 1) + throw new System.ArgumentException("precisionStep must be >=1"); + this.field = StringHelper.Intern(field); + this.precisionStep = precisionStep; + this.valSize = valSize; + this.min = min; + this.max = max; + this.minInclusive = minInclusive; + this.maxInclusive = maxInclusive; + + // For bigger precisionSteps this query likely + // hits too many terms, so set to CONSTANT_SCORE_FILTER right off + // (especially as the FilteredTermEnum is costly if wasted only for AUTO tests because it + // creates new enums from IndexReader for each sub-range) + switch (valSize) + { + + case 64: + RewriteMethod = (precisionStep > 6)?CONSTANT_SCORE_FILTER_REWRITE:CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + break; + + case 32: + RewriteMethod = (precisionStep > 8)?CONSTANT_SCORE_FILTER_REWRITE:CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + break; + + default: + // should never happen + throw new System.ArgumentException("valSize must be 32 or 64"); + + } + + // shortcut if upper bound == lower bound + if (min != null && min.Equals(max)) + { + RewriteMethod = CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + } + } + + //@Override + protected internal override FilteredTermEnum GetEnum(IndexReader reader) + { + return new NumericRangeTermEnum(this, reader); + } + + /// Returns the field name for this query + public string Field + { + get { return field; } + } + + /// Returns true if the lower endpoint is inclusive + public bool IncludesMin + { + get { return minInclusive; } + } + + /// Returns true if the upper endpoint is inclusive + public bool IncludesMax + { + get { return maxInclusive; } + } + + /// Returns the lower value of this range query + public T? Min + { + get { return min; } + } + + /// Returns the upper value of this range query + public T? Max + { + get { return max; } + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder sb = new System.Text.StringBuilder(); + if (!this.field.Equals(field)) + sb.Append(this.field).Append(':'); + return sb.Append(minInclusive ? '[' : '{').Append((min == null) ? "*" : min.ToString()).Append(" TO ").Append((max == null) ? "*" : max.ToString()).Append(maxInclusive ? ']' : '}').Append(ToStringUtils.Boost(Boost)).ToString(); + } + + public override bool Equals(System.Object o) + { + if (o == this) + return true; + if (!base.Equals(o)) + return false; + if (o is NumericRangeQuery) + { + NumericRangeQuery q = (NumericRangeQuery)o; + return ((System.Object)field == (System.Object)q.field && (q.min == null ? min == null : q.min.Equals(min)) && (q.max == null ? max == null : q.max.Equals(max)) && minInclusive == q.minInclusive && maxInclusive == q.maxInclusive && precisionStep == q.precisionStep); + } + return false; + } + + public override int GetHashCode() + { + int hash = base.GetHashCode(); + hash += (field.GetHashCode() ^ 0x4565fd66 + precisionStep ^ 0x64365465); + if (min != null) + hash += (min.GetHashCode() ^ 0x14fa55fb); + if (max != null) + hash += (max.GetHashCode() ^ 0x733fa5fe); + return hash + (minInclusive.GetHashCode() ^ 0x14fa55fb) + (maxInclusive.GetHashCode() ^ 0x733fa5fe); + } + + // field must be interned after reading from stream + //private void ReadObject(java.io.ObjectInputStream in) + //{ + // in.defaultReadObject(); + // field = StringHelper.intern(field); + //} + + + [System.Runtime.Serialization.OnDeserialized] + internal void OnDeserialized(System.Runtime.Serialization.StreamingContext context) + { + field = StringHelper.Intern(field); + } + + // members (package private, to be also fast accessible by NumericRangeTermEnum) + internal System.String field; + internal int precisionStep; + internal int valSize; + internal T? min; + internal T? max; + internal bool minInclusive; + internal bool maxInclusive; + + /// Subclass of FilteredTermEnum for enumerating all terms that match the + /// sub-ranges for trie range queries. + ///

+ /// WARNING: This term enumeration is not guaranteed to be always ordered by + /// . + /// The ordering depends on how and + /// generates the sub-ranges. For + /// ordering is not relevant. + ///

+ private sealed class NumericRangeTermEnum:FilteredTermEnum + { + private class AnonymousClassLongRangeBuilder:NumericUtils.LongRangeBuilder + { + public AnonymousClassLongRangeBuilder(NumericRangeTermEnum enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(NumericRangeTermEnum enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private NumericRangeTermEnum enclosingInstance; + public NumericRangeTermEnum Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + //@Override + public override void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded) + { + Enclosing_Instance.rangeBounds.AddLast(minPrefixCoded); + Enclosing_Instance.rangeBounds.AddLast(maxPrefixCoded); + } + } + private class AnonymousClassIntRangeBuilder:NumericUtils.IntRangeBuilder + { + public AnonymousClassIntRangeBuilder(NumericRangeTermEnum enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(NumericRangeTermEnum enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private NumericRangeTermEnum enclosingInstance; + public NumericRangeTermEnum Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + //@Override + public override void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded) + { + Enclosing_Instance.rangeBounds.AddLast(minPrefixCoded); + Enclosing_Instance.rangeBounds.AddLast(maxPrefixCoded); + } + } + private void InitBlock(NumericRangeQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + termTemplate = new Term(Enclosing_Instance.field); + } + private NumericRangeQuery enclosingInstance; + public NumericRangeQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + private IndexReader reader; + private LinkedList rangeBounds = new LinkedList(); + private Term termTemplate; + private System.String currentUpperBound = null; + + private bool isDisposed; + + internal NumericRangeTermEnum(NumericRangeQuery enclosingInstance, IndexReader reader) + { + InitBlock(enclosingInstance); + this.reader = reader; + + Type rangeType = Nullable.GetUnderlyingType(typeof(T?)); + switch (Enclosing_Instance.valSize) + { + case 64: { + // lower + long minBound = System.Int64.MinValue; + if (rangeType == typeof(System.Int64)) + { + // added in these checks to emulate java. passing null give it no type (in old code), + // but .net can identifies it with generics and sets the bounds to 0, causing tests to fail + if (Enclosing_Instance.min != null) + minBound = System.Convert.ToInt64(Enclosing_Instance.min); + } + else if (rangeType == typeof(System.Double)) + { + if (Enclosing_Instance.min != null) + minBound = NumericUtils.DoubleToSortableLong(System.Convert.ToDouble(Enclosing_Instance.min)); + } + if (!Enclosing_Instance.minInclusive && Enclosing_Instance.min != null) + { + if (minBound == System.Int64.MaxValue) + break; + minBound++; + } + + // upper + long maxBound = System.Int64.MaxValue; + if (rangeType == typeof(System.Int64)) + { + if (Enclosing_Instance.max != null) + maxBound = System.Convert.ToInt64(Enclosing_Instance.max); + } + else if (rangeType == typeof(System.Double)) + { + if (Enclosing_Instance.max != null) + maxBound = NumericUtils.DoubleToSortableLong(System.Convert.ToDouble(Enclosing_Instance.max)); + } + if (!Enclosing_Instance.maxInclusive && Enclosing_Instance.max != null) + { + if (maxBound == System.Int64.MinValue) + break; + maxBound--; + } + + NumericUtils.SplitLongRange(new AnonymousClassLongRangeBuilder(this), Enclosing_Instance.precisionStep, minBound, maxBound); + break; + } + + + case 32: { + // lower + int minBound = System.Int32.MinValue; + if (rangeType == typeof(System.Int32)) + { + if (Enclosing_Instance.min != null) + minBound = System.Convert.ToInt32(Enclosing_Instance.min); + } + else if (rangeType == typeof(System.Single)) + { + if (Enclosing_Instance.min != null) + minBound = NumericUtils.FloatToSortableInt(System.Convert.ToSingle(Enclosing_Instance.min)); + } + if (!Enclosing_Instance.minInclusive && Enclosing_Instance.min != null) + { + if (minBound == System.Int32.MaxValue) + break; + minBound++; + } + + // upper + int maxBound = System.Int32.MaxValue; + if (rangeType == typeof(System.Int32)) + { + if (Enclosing_Instance.max != null) + maxBound = System.Convert.ToInt32(Enclosing_Instance.max); + } + else if (rangeType == typeof(System.Single)) + { + if (Enclosing_Instance.max != null) + maxBound = NumericUtils.FloatToSortableInt(System.Convert.ToSingle(Enclosing_Instance.max)); + } + if (!Enclosing_Instance.maxInclusive && Enclosing_Instance.max != null) + { + if (maxBound == System.Int32.MinValue) + break; + maxBound--; + } + + NumericUtils.SplitIntRange(new AnonymousClassIntRangeBuilder(this), Enclosing_Instance.precisionStep, minBound, maxBound); + break; + } + + + default: + // should never happen + throw new System.ArgumentException("valSize must be 32 or 64"); + + } + + // seek to first term + Next(); + } + + //@Override + public override float Difference() + { + return 1.0f; + } + + /// this is a dummy, it is not used by this class. + //@Override + public override bool EndEnum() + { + throw new NotSupportedException("not implemented"); + } + + /// this is a dummy, it is not used by this class. + protected internal override void SetEnum(TermEnum tenum) + { + throw new NotSupportedException("not implemented"); + } + + /// Compares if current upper bound is reached, + /// this also updates the term count for statistics. + /// In contrast to , a return value + /// of false ends iterating the current enum + /// and forwards to the next sub-range. + /// + //@Override + protected internal override bool TermCompare(Term term) + { + return (term.Field == Enclosing_Instance.field && String.CompareOrdinal(term.Text, currentUpperBound) <= 0); + } + + /// Increments the enumeration to the next element. True if one exists. + //@Override + public override bool Next() + { + // if a current term exists, the actual enum is initialized: + // try change to next term, if no such term exists, fall-through + if (currentTerm != null) + { + System.Diagnostics.Debug.Assert(actualEnum != null); + if (actualEnum.Next()) + { + currentTerm = actualEnum.Term; + if (TermCompare(currentTerm)) + return true; + } + } + // if all above fails, we go forward to the next enum, + // if one is available + currentTerm = null; + while (rangeBounds.Count >= 2) + { + // close the current enum and read next bounds + if (actualEnum != null) + { + actualEnum.Close(); + actualEnum = null; + } + string lowerBound = rangeBounds.First.Value; + rangeBounds.RemoveFirst(); + this.currentUpperBound = rangeBounds.First.Value; + rangeBounds.RemoveFirst(); + // create a new enum + actualEnum = reader.Terms(termTemplate.CreateTerm(lowerBound)); + currentTerm = actualEnum.Term; + if (currentTerm != null && TermCompare(currentTerm)) + return true; + // clear the current term for next iteration + currentTerm = null; + } + + // no more sub-range enums available + System.Diagnostics.Debug.Assert(rangeBounds.Count == 0 && currentTerm == null); + return false; + } + + /// Closes the enumeration to further activity, freeing resources. + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + rangeBounds.Clear(); + currentUpperBound = null; + + isDisposed = true; + base.Dispose(disposing); + } + } + } + + public static class NumericRangeQuery + { + ///
Factory that creates a NumericRangeQuery, that queries a long + /// range using the given precisionStep. + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeQuery NewLongRange(System.String field, int precisionStep, long? min, long? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive); + } + + /// Factory that creates a NumericRangeQuery, that queries a long + /// range using the default precisionStep (4). + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeQuery NewLongRange(System.String field, long? min, long? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive); + } + + /// Factory that creates a NumericRangeQuery, that queries a int + /// range using the given precisionStep. + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeQuery NewIntRange(System.String field, int precisionStep, int? min, int? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive); + } + + /// Factory that creates a NumericRangeQuery, that queries a int + /// range using the default precisionStep (4). + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeQuery NewIntRange(System.String field, int? min, int? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive); + } + + /// Factory that creates a NumericRangeQuery, that queries a double + /// range using the given precisionStep. + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeQuery NewDoubleRange(System.String field, int precisionStep, double? min, double? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive); + } + + /// Factory that creates a NumericRangeQuery, that queries a double + /// range using the default precisionStep (4). + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeQuery NewDoubleRange(System.String field, double? min, double? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive); + } + + /// Factory that creates a NumericRangeQuery, that queries a float + /// range using the given precisionStep. + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeQuery NewFloatRange(System.String field, int precisionStep, float? min, float? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive); + } + + /// Factory that creates a NumericRangeQuery, that queries a float + /// range using the default precisionStep (4). + /// You can have half-open ranges (which are in fact </≤ or >/≥ queries) + /// by setting the min or max value to null. By setting inclusive to false, it will + /// match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. + /// + public static NumericRangeQuery NewFloatRange(System.String field, float? min, float? max, bool minInclusive, bool maxInclusive) + { + return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ParallelMultiSearcher.cs b/external/Lucene.Net.Light/src/core/Search/ParallelMultiSearcher.cs new file mode 100644 index 0000000000..def231a8d2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ParallelMultiSearcher.cs @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if !NET35 + +using System; +using System.Threading; +using System.Threading.Tasks; +using System.Linq; +using Lucene.Net.Support; +using Lucene.Net.Util; +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; + +namespace Lucene.Net.Search +{ + /// Implements parallel search over a set of Searchables. + /// + ///

Applications usually need only call the inherited + /// or methods. + ///

+ public class ParallelMultiSearcher : MultiSearcher/*, IDisposable*/ //No need to implement IDisposable like java, nothing to dispose with the TPL + { + private class AnonymousClassCollector1:Collector + { + public AnonymousClassCollector1(Lucene.Net.Search.Collector collector, int start, ParallelMultiSearcher enclosingInstance) + { + InitBlock(collector, start, enclosingInstance); + } + private void InitBlock(Lucene.Net.Search.Collector collector, int start, ParallelMultiSearcher enclosingInstance) + { + this.collector = collector; + this.start = start; + this.enclosingInstance = enclosingInstance; + } + private Lucene.Net.Search.Collector collector; + private int start; + private ParallelMultiSearcher enclosingInstance; + public ParallelMultiSearcher Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public override void SetScorer(Scorer scorer) + { + collector.SetScorer(scorer); + } + public override void Collect(int doc) + { + collector.Collect(doc); + } + public override void SetNextReader(IndexReader reader, int docBase) + { + collector.SetNextReader(reader, start + docBase); + } + + public override bool AcceptsDocsOutOfOrder + { + get { return collector.AcceptsDocsOutOfOrder; } + } + } + + private Searchable[] searchables; + private int[] starts; + + /// Creates a which searches searchables. + public ParallelMultiSearcher(params Searchable[] searchables) + : base(searchables) + { + this.searchables = searchables; + this.starts = GetStarts(); + } + + /// + /// Executes each 's docFreq() in its own thread and + /// waits for each search to complete and merge the results back together. + /// + public override int DocFreq(Term term) + { + Task[] tasks = new Task[searchables.Length]; + for (int i = 0; i < searchables.Length; i++) + { + Searchable searchable = searchables[i]; + tasks[i] = Task.Factory.StartNew(() => searchable.DocFreq(term)); + } + + Task.WaitAll(tasks); + return tasks.Sum(task => task.Result); + } + + /// A search implementation which executes each + /// in its own thread and waits for each search to complete + /// and merge the results back together. + /// + public override TopDocs Search(Weight weight, Filter filter, int nDocs) + { + HitQueue hq = new HitQueue(nDocs, false); + object lockObj = new object(); + + Task[] tasks = new Task[searchables.Length]; + //search each searchable + for (int i = 0; i < searchables.Length; i++) + { + int cur = i; + tasks[i] = + Task.Factory.StartNew(() => MultiSearcherCallableNoSort(ThreadLock.MonitorLock, lockObj, searchables[cur], weight, filter, + nDocs, hq, cur, starts)); + } + + int totalHits = 0; + float maxScore = float.NegativeInfinity; + + + Task.WaitAll(tasks); + foreach(TopDocs topDocs in tasks.Select(x => x.Result)) + { + totalHits += topDocs.TotalHits; + maxScore = Math.Max(maxScore, topDocs.MaxScore); + } + + ScoreDoc[] scoreDocs = new ScoreDoc[hq.Size()]; + for (int i = hq.Size() - 1; i >= 0; i--) // put docs in array + scoreDocs[i] = hq.Pop(); + + return new TopDocs(totalHits, scoreDocs, maxScore); + } + + /// A search implementation allowing sorting which spans a new thread for each + /// Searchable, waits for each search to complete and merges + /// the results back together. + /// + public override TopFieldDocs Search(Weight weight, Filter filter, int nDocs, Sort sort) + { + if (sort == null) throw new ArgumentNullException("sort"); + + FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue(nDocs); + object lockObj = new object(); + + Task[] tasks = new Task[searchables.Length]; + for (int i = 0; i < searchables.Length; i++) // search each searchable + { + int cur = i; + tasks[i] = + Task.Factory.StartNew( + () => MultiSearcherCallableWithSort(ThreadLock.MonitorLock, lockObj, searchables[cur], weight, filter, nDocs, hq, sort, cur, + starts)); + } + + int totalHits = 0; + float maxScore = float.NegativeInfinity; + + Task.WaitAll(tasks); + foreach (TopFieldDocs topFieldDocs in tasks.Select(x => x.Result)) + { + totalHits += topFieldDocs.TotalHits; + maxScore = Math.Max(maxScore, topFieldDocs.MaxScore); + } + + ScoreDoc[] scoreDocs = new ScoreDoc[hq.Size()]; + for (int i = hq.Size() - 1; i >= 0; i--) + scoreDocs[i] = hq.Pop(); + + return new TopFieldDocs(totalHits, scoreDocs, hq.GetFields(), maxScore); + } + + /// Lower-level search API. + /// + ///

is called for every matching document. + /// + ///

Applications should only use this if they need all of the + /// matching documents. The high-level search API () + /// is usually more efficient, as it skips + /// non-high-scoring hits. + ///

This method cannot be parallelized, because + /// supports no concurrent access. + ///

+ /// to match documents + /// + /// if non-null, a bitset used to eliminate some documents + /// + /// to receive hits + /// + /// TODO: parallelize this one too + /// + public override void Search(Weight weight, Filter filter, Collector collector) + { + for (int i = 0; i < searchables.Length; i++) + { + + int start = starts[i]; + + Collector hc = new AnonymousClassCollector1(collector, start, this); + + searchables[i].Search(weight, filter, hc); + } + } + } +} + +#endif \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Payloads/AveragePayloadFunction.cs b/external/Lucene.Net.Light/src/core/Search/Payloads/AveragePayloadFunction.cs new file mode 100644 index 0000000000..b262867304 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Payloads/AveragePayloadFunction.cs @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search.Payloads +{ + + + /// Calculate the final score as the average score of all payloads seen. + ///

+ /// Is thread safe and completely reusable. + /// + /// + ///

+ [Serializable] + public class AveragePayloadFunction:PayloadFunction + { + + public override float CurrentScore(int docId, System.String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) + { + return currentPayloadScore + currentScore; + } + + public override float DocScore(int docId, System.String field, int numPayloadsSeen, float payloadScore) + { + return numPayloadsSeen > 0?(payloadScore / numPayloadsSeen):1; + } + + public override int GetHashCode() + { + int prime = 31; + int result = 1; + result = prime * result + this.GetType().GetHashCode(); + return result; + } + + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (GetType() != obj.GetType()) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Payloads/MaxPayloadFunction.cs b/external/Lucene.Net.Light/src/core/Search/Payloads/MaxPayloadFunction.cs new file mode 100644 index 0000000000..3c02a80d7f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Payloads/MaxPayloadFunction.cs @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search.Payloads +{ + + + /// Returns the maximum payload score seen, else 1 if there are no payloads on the doc. + ///

+ /// Is thread safe and completely reusable. + /// + /// + ///

+ [Serializable] + public class MaxPayloadFunction:PayloadFunction + { + public override float CurrentScore(int docId, System.String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) + { + if (numPayloadsSeen == 0) + { + return currentPayloadScore; + } + else + { + return System.Math.Max(currentPayloadScore, currentScore); + } + } + + public override float DocScore(int docId, System.String field, int numPayloadsSeen, float payloadScore) + { + return numPayloadsSeen > 0?payloadScore:1; + } + + public override int GetHashCode() + { + int prime = 31; + int result = 1; + result = prime * result + this.GetType().GetHashCode(); + return result; + } + + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (GetType() != obj.GetType()) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Payloads/MinPayloadFunction.cs b/external/Lucene.Net.Light/src/core/Search/Payloads/MinPayloadFunction.cs new file mode 100644 index 0000000000..0dfa82da99 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Payloads/MinPayloadFunction.cs @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search.Payloads +{ + + /// Calculates the minimum payload seen + /// + /// + /// + [Serializable] + public class MinPayloadFunction:PayloadFunction + { + + public override float CurrentScore(int docId, System.String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore) + { + if (numPayloadsSeen == 0) + { + return currentPayloadScore; + } + else + { + return System.Math.Min(currentPayloadScore, currentScore); + } + } + + public override float DocScore(int docId, System.String field, int numPayloadsSeen, float payloadScore) + { + return numPayloadsSeen > 0?payloadScore:1; + } + + public override int GetHashCode() + { + int prime = 31; + int result = 1; + result = prime * result + this.GetType().GetHashCode(); + return result; + } + + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (GetType() != obj.GetType()) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadFunction.cs b/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadFunction.cs new file mode 100644 index 0000000000..c4a522eece --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadFunction.cs @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search.Payloads +{ + + + /// An abstract class that defines a way for Payload*Query instances + /// to transform the cumulative effects of payload scores for a document. + /// + /// + /// for more information + /// + ///

+ /// This class and its derivations are experimental and subject to change + /// + /// + /// + [Serializable] + public abstract class PayloadFunction + { + + ///

Calculate the score up to this point for this doc and field + /// The current doc + /// + /// The field + /// + /// The start position of the matching Span + /// + /// The end position of the matching Span + /// + /// The number of payloads seen so far + /// + /// The current score so far + /// + /// The score for the current payload + /// + /// The new current Score + /// + /// + /// + /// + public abstract float CurrentScore(int docId, System.String field, int start, int end, int numPayloadsSeen, float currentScore, float currentPayloadScore); + + /// Calculate the final score for all the payloads seen so far for this doc/field + /// The current doc + /// + /// The current field + /// + /// The total number of payloads seen on this document + /// + /// The raw score for those payloads + /// + /// The final score for the payloads + /// + public abstract float DocScore(int docId, System.String field, int numPayloadsSeen, float payloadScore); + + abstract public override int GetHashCode(); + + abstract public override bool Equals(System.Object o); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadNearQuery.cs b/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadNearQuery.cs new file mode 100644 index 0000000000..6b99f59fb7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadNearQuery.cs @@ -0,0 +1,284 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using Explanation = Lucene.Net.Search.Explanation; +using Scorer = Lucene.Net.Search.Scorer; +using Searcher = Lucene.Net.Search.Searcher; +using Similarity = Lucene.Net.Search.Similarity; +using Weight = Lucene.Net.Search.Weight; +using NearSpansOrdered = Lucene.Net.Search.Spans.NearSpansOrdered; +using NearSpansUnordered = Lucene.Net.Search.Spans.NearSpansUnordered; +using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery; +using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; +using SpanScorer = Lucene.Net.Search.Spans.SpanScorer; +using SpanWeight = Lucene.Net.Search.Spans.SpanWeight; + +namespace Lucene.Net.Search.Payloads +{ + + /// This class is very similar to + /// except that it factors + /// in the value of the payloads located at each of the positions where the + /// occurs. + ///

+ /// In order to take advantage of this, you must override + /// + /// which returns 1 by default. + ///

+ /// Payload scores are aggregated using a pluggable . + /// + ///

+ /// + /// + [Serializable] + public class PayloadNearQuery:SpanNearQuery, System.ICloneable + { + protected internal System.String fieldName; + protected internal PayloadFunction function; + + public PayloadNearQuery(SpanQuery[] clauses, int slop, bool inOrder):this(clauses, slop, inOrder, new AveragePayloadFunction()) + { + } + + public PayloadNearQuery(SpanQuery[] clauses, int slop, bool inOrder, PayloadFunction function):base(clauses, slop, inOrder) + { + fieldName = clauses[0].Field; // all clauses must have same field + this.function = function; + } + + public override Weight CreateWeight(Searcher searcher) + { + return new PayloadNearSpanWeight(this, this, searcher); + } + + public override System.Object Clone() + { + int sz = clauses.Count; + SpanQuery[] newClauses = new SpanQuery[sz]; + + for (int i = 0; i < sz; i++) + { + newClauses[i] = clauses[i]; + } + PayloadNearQuery boostingNearQuery = new PayloadNearQuery(newClauses, internalSlop, inOrder); + boostingNearQuery.Boost = Boost; + return boostingNearQuery; + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("payloadNear(["); + var i = clauses.GetEnumerator(); + while (i.MoveNext()) + { + SpanQuery clause = i.Current; + buffer.Append(clause.ToString(field)); + if (i.MoveNext()) + { + buffer.Append(", "); + } + } + buffer.Append("], "); + buffer.Append(internalSlop); + buffer.Append(", "); + buffer.Append(inOrder); + buffer.Append(")"); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + // @Override + public override int GetHashCode() + { + int prime = 31; + int result = base.GetHashCode(); + result = prime * result + ((fieldName == null)?0:fieldName.GetHashCode()); + result = prime * result + ((function == null)?0:function.GetHashCode()); + return result; + } + + // @Override + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (!base.Equals(obj)) + return false; + if (GetType() != obj.GetType()) + return false; + PayloadNearQuery other = (PayloadNearQuery) obj; + if (fieldName == null) + { + if (other.fieldName != null) + return false; + } + else if (!fieldName.Equals(other.fieldName)) + return false; + if (function == null) + { + if (other.function != null) + return false; + } + else if (!function.Equals(other.function)) + return false; + return true; + } + + [Serializable] + public class PayloadNearSpanWeight:SpanWeight + { + private void InitBlock(PayloadNearQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private PayloadNearQuery enclosingInstance; + public PayloadNearQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public PayloadNearSpanWeight(PayloadNearQuery enclosingInstance, SpanQuery query, Searcher searcher):base(query, searcher) + { + InitBlock(enclosingInstance); + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + return new PayloadNearSpanScorer(enclosingInstance, internalQuery.GetSpans(reader), this, similarity, reader.Norms(internalQuery.Field)); + } + } + + public class PayloadNearSpanScorer:SpanScorer + { + private void InitBlock(PayloadNearQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + similarity = Similarity; + } + private PayloadNearQuery enclosingInstance; + public PayloadNearQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + new internal Lucene.Net.Search.Spans.Spans spans; + + protected internal float payloadScore; + private int payloadsSeen; + internal Similarity similarity; + + protected internal PayloadNearSpanScorer(PayloadNearQuery enclosingInstance, Lucene.Net.Search.Spans.Spans spans, Weight weight, Similarity similarity, byte[] norms):base(spans, weight, similarity, norms) + { + InitBlock(enclosingInstance); + this.spans = spans; + } + + // Get the payloads associated with all underlying subspans + public virtual void GetPayloads(Lucene.Net.Search.Spans.Spans[] subSpans) + { + for (int i = 0; i < subSpans.Length; i++) + { + if (subSpans[i] is NearSpansOrdered) + { + if (((NearSpansOrdered) subSpans[i]).IsPayloadAvailable()) + { + ProcessPayloads(((NearSpansOrdered) subSpans[i]).GetPayload(), subSpans[i].Start(), subSpans[i].End()); + } + GetPayloads(((NearSpansOrdered) subSpans[i]).GetSubSpans()); + } + else if (subSpans[i] is NearSpansUnordered) + { + if (((NearSpansUnordered) subSpans[i]).IsPayloadAvailable()) + { + ProcessPayloads(((NearSpansUnordered) subSpans[i]).GetPayload(), subSpans[i].Start(), subSpans[i].End()); + } + GetPayloads(((NearSpansUnordered) subSpans[i]).GetSubSpans()); + } + } + } + + /// By default, uses the to score the payloads, but + /// can be overridden to do other things. + /// + /// + /// The payloads + /// + /// The start position of the span being scored + /// + /// The end position of the span being scored + /// + /// + /// + /// + protected internal virtual void ProcessPayloads(System.Collections.Generic.ICollection payLoads, int start, int end) + { + foreach (byte[] thePayload in payLoads) + { + payloadScore = Enclosing_Instance.function.CurrentScore(doc, Enclosing_Instance.fieldName, start, end, payloadsSeen, payloadScore, similarity.ScorePayload(doc, Enclosing_Instance.fieldName, spans.Start(), spans.End(), thePayload, 0, thePayload.Length)); + ++payloadsSeen; + } + } + + // + public /*protected internal*/ override bool SetFreqCurrentDoc() + { + if (!more) + { + return false; + } + Lucene.Net.Search.Spans.Spans[] spansArr = new Lucene.Net.Search.Spans.Spans[1]; + spansArr[0] = spans; + payloadScore = 0; + payloadsSeen = 0; + GetPayloads(spansArr); + return base.SetFreqCurrentDoc(); + } + + public override float Score() + { + + return base.Score() * Enclosing_Instance.function.DocScore(doc, Enclosing_Instance.fieldName, payloadsSeen, payloadScore); + } + + protected internal override Explanation Explain(int doc) + { + Explanation result = new Explanation(); + Explanation nonPayloadExpl = base.Explain(doc); + result.AddDetail(nonPayloadExpl); + Explanation payloadBoost = new Explanation(); + result.AddDetail(payloadBoost); + float avgPayloadScore = (payloadsSeen > 0?(payloadScore / payloadsSeen):1); + payloadBoost.Value = avgPayloadScore; + payloadBoost.Description = "scorePayload(...)"; + result.Value = nonPayloadExpl.Value * avgPayloadScore; + result.Description = "bnq, product of:"; + return result; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadSpanUtil.cs b/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadSpanUtil.cs new file mode 100644 index 0000000000..f7cd2aabd2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadSpanUtil.cs @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using BooleanClause = Lucene.Net.Search.BooleanClause; +using BooleanQuery = Lucene.Net.Search.BooleanQuery; +using DisjunctionMaxQuery = Lucene.Net.Search.DisjunctionMaxQuery; +using FilteredQuery = Lucene.Net.Search.FilteredQuery; +using MultiPhraseQuery = Lucene.Net.Search.MultiPhraseQuery; +using PhraseQuery = Lucene.Net.Search.PhraseQuery; +using Query = Lucene.Net.Search.Query; +using TermQuery = Lucene.Net.Search.TermQuery; +using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery; +using SpanOrQuery = Lucene.Net.Search.Spans.SpanOrQuery; +using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; +using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; + +namespace Lucene.Net.Search.Payloads +{ + + /// Experimental class to get set of payloads for most standard Lucene queries. + /// Operates like Highlighter - IndexReader should only contain doc of interest, + /// best to use MemoryIndex. + /// + ///

+ /// + /// WARNING: The status of the Payloads feature is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case. + /// + ///

+ public class PayloadSpanUtil + { + private IndexReader reader; + + /// that contains doc with payloads to extract + /// + public PayloadSpanUtil(IndexReader reader) + { + this.reader = reader; + } + + /// Query should be rewritten for wild/fuzzy support. + /// + /// + /// + /// + /// payloads Collection + /// + /// IOException + public virtual ICollection GetPayloadsForQuery(Query query) + { + ICollection payloads = new List(); + QueryToSpanQuery(query, payloads); + return payloads; + } + + private void QueryToSpanQuery(Query query, ICollection payloads) + { + if (query is BooleanQuery) + { + BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses(); + + for (int i = 0; i < queryClauses.Length; i++) + { + if (!queryClauses[i].IsProhibited) + { + QueryToSpanQuery(queryClauses[i].Query, payloads); + } + } + } + else if (query is PhraseQuery) + { + Term[] phraseQueryTerms = ((PhraseQuery) query).GetTerms(); + SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length]; + for (int i = 0; i < phraseQueryTerms.Length; i++) + { + clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); + } + + int slop = ((PhraseQuery) query).Slop; + bool inorder = false; + + if (slop == 0) + { + inorder = true; + } + + SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); + sp.Boost = query.Boost; + GetPayloads(payloads, sp); + } + else if (query is TermQuery) + { + SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).Term); + stq.Boost = query.Boost; + GetPayloads(payloads, stq); + } + else if (query is SpanQuery) + { + GetPayloads(payloads, (SpanQuery) query); + } + else if (query is FilteredQuery) + { + QueryToSpanQuery(((FilteredQuery) query).Query, payloads); + } + else if (query is DisjunctionMaxQuery) + { + + for (IEnumerator iterator = ((DisjunctionMaxQuery)query).GetEnumerator(); iterator.MoveNext(); ) + { + QueryToSpanQuery(iterator.Current, payloads); + } + } + else if (query is MultiPhraseQuery) + { + MultiPhraseQuery mpq = (MultiPhraseQuery) query; + System.Collections.Generic.IList termArrays = mpq.GetTermArrays(); + int[] positions = mpq.GetPositions(); + if (positions.Length > 0) + { + + int maxPosition = positions[positions.Length - 1]; + for (int i = 0; i < positions.Length - 1; ++i) + { + if (positions[i] > maxPosition) + { + maxPosition = positions[i]; + } + } + + IList[] disjunctLists = new IList[maxPosition + 1]; + int distinctPositions = 0; + + for (int i = 0; i < termArrays.Count; ++i) + { + Term[] termArray = termArrays[i]; + IList disjuncts = disjunctLists[positions[i]]; + if (disjuncts == null) + { + disjuncts = (disjunctLists[positions[i]] = new List(termArray.Length)); + ++distinctPositions; + } + foreach(Term term in termArray) + { + disjuncts.Add(new SpanTermQuery(term)); + } + } + + int positionGaps = 0; + int position = 0; + SpanQuery[] clauses = new SpanQuery[distinctPositions]; + for (int i = 0; i < disjunctLists.Length; ++i) + { + IList disjuncts = disjunctLists[i]; + if (disjuncts != null) + { + clauses[position++] = new SpanOrQuery((SpanQuery[]) (disjuncts.ToArray())); + } + else + { + ++positionGaps; + } + } + + int slop = mpq.Slop; + bool inorder = (slop == 0); + + SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); + sp.Boost = query.Boost; + GetPayloads(payloads, sp); + } + } + } + + private void GetPayloads(ICollection payloads, SpanQuery query) + { + Spans.Spans spans = query.GetSpans(reader); + + while (spans.Next() == true) + { + if (spans.IsPayloadAvailable()) + { + ICollection payload = spans.GetPayload(); + foreach (byte[] bytes in payload) + { + payloads.Add(bytes); + } + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadTermQuery.cs b/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadTermQuery.cs new file mode 100644 index 0000000000..d6ec5bdac1 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Payloads/PayloadTermQuery.cs @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using TermPositions = Lucene.Net.Index.TermPositions; +using ComplexExplanation = Lucene.Net.Search.ComplexExplanation; +using Explanation = Lucene.Net.Search.Explanation; +using Scorer = Lucene.Net.Search.Scorer; +using Searcher = Lucene.Net.Search.Searcher; +using Similarity = Lucene.Net.Search.Similarity; +using Weight = Lucene.Net.Search.Weight; +using SpanScorer = Lucene.Net.Search.Spans.SpanScorer; +using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery; +using SpanWeight = Lucene.Net.Search.Spans.SpanWeight; +using TermSpans = Lucene.Net.Search.Spans.TermSpans; + +namespace Lucene.Net.Search.Payloads +{ + + /// This class is very similar to + /// except that it factors + /// in the value of the payload located at each of the positions where the + /// occurs. + ///

+ /// In order to take advantage of this, you must override + /// + /// which returns 1 by default. + ///

+ /// Payload scores are aggregated using a pluggable . + /// + ///

+ [Serializable] + public class PayloadTermQuery:SpanTermQuery + { + protected internal PayloadFunction function; + private bool includeSpanScore; + + public PayloadTermQuery(Term term, PayloadFunction function):this(term, function, true) + { + } + + public PayloadTermQuery(Term term, PayloadFunction function, bool includeSpanScore):base(term) + { + this.function = function; + this.includeSpanScore = includeSpanScore; + } + + public override Weight CreateWeight(Searcher searcher) + { + return new PayloadTermWeight(this, this, searcher); + } + + [Serializable] + protected internal class PayloadTermWeight:SpanWeight + { + private void InitBlock(PayloadTermQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private PayloadTermQuery enclosingInstance; + public PayloadTermQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + public PayloadTermWeight(PayloadTermQuery enclosingInstance, PayloadTermQuery query, Searcher searcher):base(query, searcher) + { + InitBlock(enclosingInstance); + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + return new PayloadTermSpanScorer(this, (TermSpans) internalQuery.GetSpans(reader), this, similarity, reader.Norms(internalQuery.Field)); + } + + protected internal class PayloadTermSpanScorer:SpanScorer + { + private void InitBlock(PayloadTermWeight enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private PayloadTermWeight enclosingInstance; + public PayloadTermWeight Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + // TODO: is this the best way to allocate this? + protected internal byte[] payload = new byte[256]; + protected internal TermPositions positions; + protected internal float payloadScore; + protected internal int payloadsSeen; + + public PayloadTermSpanScorer(PayloadTermWeight enclosingInstance, TermSpans spans, Weight weight, Similarity similarity, byte[] norms):base(spans, weight, similarity, norms) + { + InitBlock(enclosingInstance); + positions = spans.Positions; + } + + public /*protected internal*/ override bool SetFreqCurrentDoc() + { + if (!more) + { + return false; + } + doc = spans.Doc(); + freq = 0.0f; + payloadScore = 0; + payloadsSeen = 0; + Similarity similarity1 = Similarity; + while (more && doc == spans.Doc()) + { + int matchLength = spans.End() - spans.Start(); + + freq += similarity1.SloppyFreq(matchLength); + ProcessPayload(similarity1); + + more = spans.Next(); // this moves positions to the next match in this + // document + } + return more || (freq != 0); + } + + protected internal virtual void ProcessPayload(Similarity similarity) + { + if (positions.IsPayloadAvailable) + { + payload = positions.GetPayload(payload, 0); + payloadScore = Enclosing_Instance.Enclosing_Instance.function.CurrentScore(doc, Enclosing_Instance.Enclosing_Instance.internalTerm.Field, spans.Start(), spans.End(), payloadsSeen, payloadScore, similarity.ScorePayload(doc, Enclosing_Instance.Enclosing_Instance.internalTerm.Field, spans.Start(), spans.End(), payload, 0, positions.PayloadLength)); + payloadsSeen++; + } + else + { + // zero out the payload? + } + } + + /// + /// * + /// + /// IOException + public override float Score() + { + + return Enclosing_Instance.Enclosing_Instance.includeSpanScore?GetSpanScore() * GetPayloadScore():GetPayloadScore(); + } + + /// Returns the SpanScorer score only. + ///

+ /// Should not be overriden without good cause! + /// + ///

+ /// the score for just the Span part w/o the payload + /// + /// IOException + /// + /// + /// + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + protected internal virtual float GetSpanScore() + { + return base.Score(); + } + + /// The score for the payload + /// + /// + /// The score, as calculated by + /// + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + protected internal virtual float GetPayloadScore() + { + return Enclosing_Instance.Enclosing_Instance.function.DocScore(doc, Enclosing_Instance.Enclosing_Instance.internalTerm.Field, payloadsSeen, payloadScore); + } + + protected internal override Explanation Explain(int doc) + { + ComplexExplanation result = new ComplexExplanation(); + Explanation nonPayloadExpl = base.Explain(doc); + result.AddDetail(nonPayloadExpl); + // QUESTION: Is there a way to avoid this skipTo call? We need to know + // whether to load the payload or not + Explanation payloadBoost = new Explanation(); + result.AddDetail(payloadBoost); + + float payloadScore = GetPayloadScore(); + payloadBoost.Value = payloadScore; + // GSI: I suppose we could toString the payload, but I don't think that + // would be a good idea + payloadBoost.Description = "scorePayload(...)"; + result.Value = nonPayloadExpl.Value * payloadScore; + result.Description = "btq, product of:"; + result.Match = nonPayloadExpl.Value == 0?false:true; // LUCENE-1303 + return result; + } + } + } + + public override int GetHashCode() + { + int prime = 31; + int result = base.GetHashCode(); + result = prime * result + ((function == null)?0:function.GetHashCode()); + result = prime * result + (includeSpanScore?1231:1237); + return result; + } + + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (!base.Equals(obj)) + return false; + if (GetType() != obj.GetType()) + return false; + PayloadTermQuery other = (PayloadTermQuery) obj; + if (function == null) + { + if (other.function != null) + return false; + } + else if (!function.Equals(other.function)) + return false; + if (includeSpanScore != other.includeSpanScore) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/PhrasePositions.cs b/external/Lucene.Net.Light/src/core/Search/PhrasePositions.cs new file mode 100644 index 0000000000..5614aed288 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/PhrasePositions.cs @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Lucene.Net.Index; + +namespace Lucene.Net.Search +{ + + /// Position of a term in a document that takes into account the term offset within the phrase. + sealed class PhrasePositions + { + internal int doc; // current doc + internal int position; // position in doc + internal int count; // remaining pos in this doc + internal int offset; // position in phrase + internal TermPositions tp; // stream of positions + internal PhrasePositions next; // used to make lists + internal bool repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1) + + internal PhrasePositions(TermPositions t, int o) + { + tp = t; + offset = o; + } + + internal bool Next() + { + // increments to next doc + if (!tp.Next()) + { + tp.Close(); // close stream + doc = System.Int32.MaxValue; // sentinel value + return false; + } + doc = tp.Doc; + position = 0; + return true; + } + + internal bool SkipTo(int target) + { + if (!tp.SkipTo(target)) + { + tp.Close(); // close stream + doc = System.Int32.MaxValue; // sentinel value + return false; + } + doc = tp.Doc; + position = 0; + return true; + } + + + internal void FirstPosition() + { + count = tp.Freq; // read first pos + NextPosition(); + } + + /// Go to next location of this term current document, and set + /// position as location - offset, so that a + /// matching exact phrase is easily identified when all PhrasePositions + /// have exactly the same position. + /// + internal bool NextPosition() + { + if (count-- > 0) + { + // read subsequent pos's + position = tp.NextPosition() - offset; + return true; + } + else + return false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/PhraseQuery.cs b/external/Lucene.Net.Light/src/core/Search/PhraseQuery.cs new file mode 100644 index 0000000000..9e96180b41 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/PhraseQuery.cs @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using TermPositions = Lucene.Net.Index.TermPositions; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation; + +namespace Lucene.Net.Search +{ + + /// A Query that matches documents containing a particular sequence of terms. + /// A PhraseQuery is built by QueryParser for input like "new york". + /// + ///

This query may be combined with other terms or queries with a . + ///

+ [Serializable] + public class PhraseQuery:Query + { + private System.String field; + private EquatableList terms = new EquatableList(4); + private EquatableList positions = new EquatableList(4); + private int maxPosition = 0; + private int slop = 0; + + /// Constructs an empty phrase query. + public PhraseQuery() + { + } + + /// Sets the number of other words permitted between words in query phrase. + /// If zero, then this is an exact phrase search. For larger values this works + /// like a WITHIN or NEAR operator. + ///

The slop is in fact an edit-distance, where the units correspond to + /// moves of terms in the query phrase out of position. For example, to switch + /// the order of two words requires two moves (the first move places the words + /// atop one another), so to permit re-orderings of phrases, the slop must be + /// at least two. + ///

More exact matches are scored higher than sloppier matches, thus search + /// results are sorted by exactness. + ///

The slop is zero by default, requiring exact matches. + ///

+ public virtual int Slop + { + get { return slop; } + set { slop = value; } + } + + /// Adds a term to the end of the query phrase. + /// The relative position of the term is the one immediately after the last term added. + /// + public virtual void Add(Term term) + { + int position = 0; + if (positions.Count > 0) + position = positions[positions.Count - 1] + 1; + + Add(term, position); + } + + /// Adds a term to the end of the query phrase. + /// The relative position of the term within the phrase is specified explicitly. + /// This allows e.g. phrases with more than one term at the same position + /// or phrases with gaps (e.g. in connection with stopwords). + /// + /// + /// + /// + /// + /// + public virtual void Add(Term term, int position) + { + if (terms.Count == 0) + field = term.Field; + else if ((System.Object) term.Field != (System.Object) field) + { + throw new System.ArgumentException("All phrase terms must be in the same field: " + term); + } + + terms.Add(term); + positions.Add(position); + if (position > maxPosition) + maxPosition = position; + } + + /// Returns the set of terms in this phrase. + public virtual Term[] GetTerms() + { + return terms.ToArray(); + } + + /// Returns the relative positions of terms in this phrase. + public virtual int[] GetPositions() + { + int[] result = new int[positions.Count]; + for (int i = 0; i < positions.Count; i++) + result[i] = positions[i]; + return result; + } + + [Serializable] + private class PhraseWeight:Weight + { + private void InitBlock(PhraseQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private PhraseQuery enclosingInstance; + public PhraseQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private Similarity similarity; + private float value_Renamed; + private float idf; + private float queryNorm; + private float queryWeight; + private IDFExplanation idfExp; + + public PhraseWeight(PhraseQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = Enclosing_Instance.GetSimilarity(searcher); + + idfExp = similarity.IdfExplain(Enclosing_Instance.terms, searcher); + idf = idfExp.Idf; + } + + public override System.String ToString() + { + return "weight(" + Enclosing_Instance + ")"; + } + + public override Query Query + { + get { return Enclosing_Instance; } + } + + public override float Value + { + get { return value_Renamed; } + } + + public override float GetSumOfSquaredWeights() + { + queryWeight = idf*Enclosing_Instance.Boost; // compute query weight + return queryWeight*queryWeight; // square it + } + + public override void Normalize(float queryNorm) + { + this.queryNorm = queryNorm; + queryWeight *= queryNorm; // normalize query weight + value_Renamed = queryWeight * idf; // idf for document + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + if (Enclosing_Instance.terms.Count == 0) + // optimize zero-term case + return null; + + TermPositions[] tps = new TermPositions[Enclosing_Instance.terms.Count]; + for (int i = 0; i < Enclosing_Instance.terms.Count; i++) + { + TermPositions p = reader.TermPositions(Enclosing_Instance.terms[i]); + if (p == null) + return null; + tps[i] = p; + } + + if (Enclosing_Instance.slop == 0) + // optimize exact case + return new ExactPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, reader.Norms(Enclosing_Instance.field)); + else + return new SloppyPhraseScorer(this, tps, Enclosing_Instance.GetPositions(), similarity, Enclosing_Instance.slop, reader.Norms(Enclosing_Instance.field)); + } + + public override Explanation Explain(IndexReader reader, int doc) + { + + Explanation result = new Explanation(); + result.Description = "weight(" + Query + " in " + doc + "), product of:"; + + System.Text.StringBuilder docFreqs = new System.Text.StringBuilder(); + System.Text.StringBuilder query = new System.Text.StringBuilder(); + query.Append('\"'); + docFreqs.Append(idfExp.Explain()); + for (int i = 0; i < Enclosing_Instance.terms.Count; i++) + { + if (i != 0) + { + query.Append(" "); + } + + Term term = Enclosing_Instance.terms[i]; + + query.Append(term.Text); + } + query.Append('\"'); + + Explanation idfExpl = new Explanation(idf, "idf(" + Enclosing_Instance.field + ":" + docFreqs + ")"); + + // explain query weight + Explanation queryExpl = new Explanation(); + queryExpl.Description = "queryWeight(" + Query + "), product of:"; + + Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost"); + if (Enclosing_Instance.Boost != 1.0f) + queryExpl.AddDetail(boostExpl); + queryExpl.AddDetail(idfExpl); + + Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); + queryExpl.AddDetail(queryNormExpl); + + queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value; + + result.AddDetail(queryExpl); + + // explain field weight + Explanation fieldExpl = new Explanation(); + fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.field + ":" + query + " in " + doc + "), product of:"; + + PhraseScorer scorer = (PhraseScorer)Scorer(reader, true, false); + if (scorer == null) + { + return new Explanation(0.0f, "no matching docs"); + } + Explanation tfExplanation = new Explanation(); + int d = scorer.Advance(doc); + float phraseFreq = (d == doc) ? scorer.CurrentFreq() : 0.0f; + tfExplanation.Value = similarity.Tf(phraseFreq); + tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")"; + + fieldExpl.AddDetail(tfExplanation); + fieldExpl.AddDetail(idfExpl); + + Explanation fieldNormExpl = new Explanation(); + byte[] fieldNorms = reader.Norms(Enclosing_Instance.field); + float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f; + fieldNormExpl.Value = fieldNorm; + fieldNormExpl.Description = "fieldNorm(field=" + Enclosing_Instance.field + ", doc=" + doc + ")"; + fieldExpl.AddDetail(fieldNormExpl); + + fieldExpl.Value = tfExplanation.Value * idfExpl.Value * fieldNormExpl.Value; + + result.AddDetail(fieldExpl); + + // combine them + result.Value = queryExpl.Value * fieldExpl.Value; + + if (queryExpl.Value == 1.0f) + return fieldExpl; + + return result; + } + } + + public override Weight CreateWeight(Searcher searcher) + { + if (terms.Count == 1) + { + // optimize one-term case + Term term = terms[0]; + Query termQuery = new TermQuery(term); + termQuery.Boost = Boost; + return termQuery.CreateWeight(searcher); + } + return new PhraseWeight(this, searcher); + } + + /// + /// + public override void ExtractTerms(System.Collections.Generic.ISet queryTerms) + { + queryTerms.UnionWith(terms); + } + + /// Prints a user-readable version of this query. + public override System.String ToString(System.String f) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + if (field != null && !field.Equals(f)) + { + buffer.Append(field); + buffer.Append(":"); + } + + buffer.Append("\""); + System.String[] pieces = new System.String[maxPosition + 1]; + for (int i = 0; i < terms.Count; i++) + { + int pos = positions[i]; + System.String s = pieces[pos]; + if (s == null) + { + s = terms[i].Text; + } + else + { + s = s + "|" + terms[i].Text; + } + pieces[pos] = s; + } + for (int i = 0; i < pieces.Length; i++) + { + if (i > 0) + { + buffer.Append(' '); + } + System.String s = pieces[i]; + if (s == null) + { + buffer.Append('?'); + } + else + { + buffer.Append(s); + } + } + buffer.Append("\""); + + if (slop != 0) + { + buffer.Append("~"); + buffer.Append(slop); + } + + buffer.Append(ToStringUtils.Boost(Boost)); + + return buffer.ToString(); + } + + /// Returns true iff o is equal to this. + public override bool Equals(System.Object o) + { + if (!(o is PhraseQuery)) + return false; + PhraseQuery other = (PhraseQuery) o; + return (this.Boost == other.Boost) && (this.slop == other.slop) && this.terms.Equals(other.terms) && this.positions.Equals(other.positions); + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ slop ^ terms.GetHashCode() ^ positions.GetHashCode(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/PhraseQueue.cs b/external/Lucene.Net.Light/src/core/Search/PhraseQueue.cs new file mode 100644 index 0000000000..d603df5f37 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/PhraseQueue.cs @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Search +{ + + sealed class PhraseQueue : PriorityQueue + { + internal PhraseQueue(int size) + { + Initialize(size); + } + + public override bool LessThan(PhrasePositions pp1, PhrasePositions pp2) + { + if (pp1.doc == pp2.doc) + if (pp1.position == pp2.position) + // same doc and pp.position, so decide by actual term positions. + // rely on: pp.position == tp.position - offset. + return pp1.offset < pp2.offset; + else + return pp1.position < pp2.position; + else + return pp1.doc < pp2.doc; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/PhraseScorer.cs b/external/Lucene.Net.Light/src/core/Search/PhraseScorer.cs new file mode 100644 index 0000000000..59c97717c7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/PhraseScorer.cs @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using TermPositions = Lucene.Net.Index.TermPositions; + +namespace Lucene.Net.Search +{ + + /// Expert: Scoring functionality for phrase queries. + ///
A document is considered matching if it contains the phrase-query terms + /// at "valid" positons. What "valid positions" are + /// depends on the type of the phrase query: for an exact phrase query terms are required + /// to appear in adjacent locations, while for a sloppy phrase query some distance between + /// the terms is allowed. The abstract method of extending classes + /// is invoked for each document containing all the phrase query terms, in order to + /// compute the frequency of the phrase query in that document. A non zero frequency + /// means a match. + ///
+ abstract class PhraseScorer:Scorer + { + private Weight weight; + protected internal byte[] norms; + protected internal float value_Renamed; + + private bool firstTime = true; + private bool more = true; + protected internal PhraseQueue pq; + protected internal PhrasePositions first, last; + + private float freq; //prhase frequency in current doc as computed by phraseFreq(). + + internal PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms):base(similarity) + { + this.norms = norms; + this.weight = weight; + this.value_Renamed = weight.Value; + + // convert tps to a list of phrase positions. + // note: phrase-position differs from term-position in that its position + // reflects the phrase offset: pp.pos = tp.pos - offset. + // this allows to easily identify a matching (exact) phrase + // when all PhrasePositions have exactly the same position. + for (int i = 0; i < tps.Length; i++) + { + PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]); + if (last != null) + { + // add next to end of list + last.next = pp; + } + else + { + first = pp; + } + last = pp; + } + + pq = new PhraseQueue(tps.Length); // construct empty pq + first.doc = - 1; + } + + public override int DocID() + { + return first.doc; + } + + public override int NextDoc() + { + if (firstTime) + { + Init(); + firstTime = false; + } + else if (more) + { + more = last.Next(); // trigger further scanning + } + if (!DoNext()) + { + first.doc = NO_MORE_DOCS; + } + return first.doc; + } + + // next without initial increment + private bool DoNext() + { + while (more) + { + while (more && first.doc < last.doc) + { + // find doc w/ all the terms + more = first.SkipTo(last.doc); // skip first upto last + FirstToLast(); // and move it to the end + } + + if (more) + { + // found a doc with all of the terms + freq = PhraseFreq(); // check for phrase + if (freq == 0.0f) + // no match + more = last.Next(); + // trigger further scanning + else + return true; // found a match + } + } + return false; // no more matches + } + + public override float Score() + { + //System.out.println("scoring " + first.doc); + float raw = Similarity.Tf(freq) * value_Renamed; // raw score + return norms == null?raw:raw * Similarity.DecodeNorm(norms[first.doc]); // normalize + } + + public override int Advance(int target) + { + firstTime = false; + for (PhrasePositions pp = first; more && pp != null; pp = pp.next) + { + more = pp.SkipTo(target); + } + if (more) + { + Sort(); // re-sort + } + if (!DoNext()) + { + first.doc = NO_MORE_DOCS; + } + return first.doc; + } + + /// + /// Phrase frequency in current doc as computed by PhraseFreq() + /// + /// + public float CurrentFreq() + { + return freq; + } + + /// For a document containing all the phrase query terms, compute the + /// frequency of the phrase in that document. + /// A non zero frequency means a match. + ///
Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations. + ///
+ /// frequency of the phrase in current doc, 0 if not found. + /// + protected internal abstract float PhraseFreq(); + + private void Init() + { + for (PhrasePositions pp = first; more && pp != null; pp = pp.next) + { + more = pp.Next(); + } + if (more) + { + Sort(); + } + } + + private void Sort() + { + pq.Clear(); + for (PhrasePositions pp = first; pp != null; pp = pp.next) + { + pq.Add(pp); + } + PqToList(); + } + + protected internal void PqToList() + { + last = first = null; + while (pq.Top() != null) + { + PhrasePositions pp = pq.Pop(); + if (last != null) + { + // add next to end of list + last.next = pp; + } + else + first = pp; + last = pp; + pp.next = null; + } + } + + protected internal void FirstToLast() + { + last.next = first; // move first to end of list + last = first; + first = first.next; + last.next = null; + } + + public override System.String ToString() + { + return "scorer(" + weight + ")"; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/PositiveScoresOnlyCollector.cs b/external/Lucene.Net.Light/src/core/Search/PositiveScoresOnlyCollector.cs new file mode 100644 index 0000000000..c9f1ca28a7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/PositiveScoresOnlyCollector.cs @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// A implementation which wraps another + /// and makes sure only documents with + /// scores > 0 are collected. + /// + public class PositiveScoresOnlyCollector:Collector + { + + private Collector c; + private Scorer scorer; + + public PositiveScoresOnlyCollector(Collector c) + { + this.c = c; + } + + public override void Collect(int doc) + { + if (scorer.Score() > 0) + { + c.Collect(doc); + } + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + c.SetNextReader(reader, docBase); + } + + public override void SetScorer(Scorer scorer) + { + // Set a ScoreCachingWrappingScorer in case the wrapped Collector will call + // score() also. + this.scorer = new ScoreCachingWrappingScorer(scorer); + c.SetScorer(this.scorer); + } + + public override bool AcceptsDocsOutOfOrder + { + get { return c.AcceptsDocsOutOfOrder; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/PrefixFilter.cs b/external/Lucene.Net.Light/src/core/Search/PrefixFilter.cs new file mode 100644 index 0000000000..d398466152 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/PrefixFilter.cs @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Term = Lucene.Net.Index.Term; + +namespace Lucene.Net.Search +{ + + /// A Filter that restricts search results to values that have a matching prefix in a given + /// field. + /// + [Serializable] + public class PrefixFilter:MultiTermQueryWrapperFilter + { + + public PrefixFilter(Term prefix):base(new PrefixQuery(prefix)) + { + } + + public virtual Term Prefix + { + get { return query.Prefix; } + } + + /// Prints a user-readable version of this query. + public override System.String ToString() + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("PrefixFilter("); + buffer.Append(Prefix.ToString()); + buffer.Append(")"); + return buffer.ToString(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/PrefixQuery.cs b/external/Lucene.Net.Light/src/core/Search/PrefixQuery.cs new file mode 100644 index 0000000000..d1a013abad --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/PrefixQuery.cs @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search +{ + + /// A Query that matches documents containing terms with a specified prefix. A PrefixQuery + /// is built by QueryParser for input like app*. + /// + ///

This query uses the + /// + /// rewrite method. + ///

+ [Serializable] + public class PrefixQuery:MultiTermQuery + { + private Term prefix; + + /// Constructs a query for terms starting with prefix. + public PrefixQuery(Term prefix) + { //will be removed in 3.0 + this.prefix = prefix; + } + + /// Returns the prefix of this query. + public virtual Term Prefix + { + get { return prefix; } + } + + protected internal override FilteredTermEnum GetEnum(IndexReader reader) + { + return new PrefixTermEnum(reader, prefix); + } + + /// Prints a user-readable version of this query. + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + if (!prefix.Field.Equals(field)) + { + buffer.Append(prefix.Field); + buffer.Append(":"); + } + buffer.Append(prefix.Text); + buffer.Append('*'); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + //@Override + public override int GetHashCode() + { + int prime = 31; + int result = base.GetHashCode(); + result = prime * result + ((prefix == null)?0:prefix.GetHashCode()); + return result; + } + + //@Override + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (!base.Equals(obj)) + return false; + if (GetType() != obj.GetType()) + return false; + PrefixQuery other = (PrefixQuery) obj; + if (prefix == null) + { + if (other.prefix != null) + return false; + } + else if (!prefix.Equals(other.prefix)) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/PrefixTermEnum.cs b/external/Lucene.Net.Light/src/core/Search/PrefixTermEnum.cs new file mode 100644 index 0000000000..c92195c52e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/PrefixTermEnum.cs @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; + +namespace Lucene.Net.Search +{ + + /// Subclass of FilteredTermEnum for enumerating all terms that match the + /// specified prefix filter term. + ///

+ /// Term enumerations are always ordered by Term.compareTo(). Each term in + /// the enumeration is greater than all that precede it. + /// + ///

+ public class PrefixTermEnum:FilteredTermEnum + { + + private Term prefix; + private bool endEnum = false; + + public PrefixTermEnum(IndexReader reader, Term prefix) + { + this.prefix = prefix; + + SetEnum(reader.Terms(new Term(prefix.Field, prefix.Text))); + } + + public override float Difference() + { + return 1.0f; + } + + public override bool EndEnum() + { + return endEnum; + } + + protected internal virtual Term PrefixTerm + { + get { return prefix; } + } + + protected internal override bool TermCompare(Term term) + { + if ((System.Object) term.Field == (System.Object) prefix.Field && term.Text.StartsWith(prefix.Text)) + { + return true; + } + endEnum = true; + return false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Query.cs b/external/Lucene.Net.Light/src/core/Search/Query.cs new file mode 100644 index 0000000000..b00d16b8f1 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Query.cs @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using Lucene.Net.Index; +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// The abstract base class for queries. + ///

Instantiable subclasses are: + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///

A parser for queries is contained in: + /// + /// QueryParser + /// + ///

+ [Serializable] + public abstract class Query : System.ICloneable + { + private float boost = 1.0f; // query boost factor + + /// Gets or sets the boost for this query clause to b. Documents + /// matching this clause will (in addition to the normal weightings) have + /// their score multiplied by b. The boost is 1.0 by default. + /// + public virtual float Boost + { + get { return boost; } + set { boost = value; } + } + + /// Prints a query to a string, with field assumed to be the + /// default field and omitted. + ///

The representation used is one that is supposed to be readable + /// by QueryParser. However, + /// there are the following limitations: + /// + /// If the query was created by the parser, the printed + /// representation may not be exactly what was parsed. For example, + /// characters that need to be escaped will be represented without + /// the required backslash. + /// Some of the more complicated queries (e.g. span queries) + /// don't have a representation that can be parsed by QueryParser. + /// + ///

+ public abstract System.String ToString(System.String field); + + /// Prints a query to a string. + public override System.String ToString() + { + return ToString(""); + } + + /// Expert: Constructs an appropriate Weight implementation for this query. + /// + ///

+ /// Only implemented by primitive queries, which re-write to themselves. + ///

+ public virtual Weight CreateWeight(Searcher searcher) + { + throw new System.NotSupportedException(); + } + + /// Expert: Constructs and initializes a Weight for a top-level query. + public virtual Weight Weight(Searcher searcher) + { + Query query = searcher.Rewrite(this); + Weight weight = query.CreateWeight(searcher); + float sum = weight.GetSumOfSquaredWeights(); + float norm = GetSimilarity(searcher).QueryNorm(sum); + if (float.IsInfinity(norm) || float.IsNaN(norm)) + norm = 1.0f; + weight.Normalize(norm); + return weight; + } + + + /// Expert: called to re-write queries into primitive queries. For example, + /// a PrefixQuery will be rewritten into a BooleanQuery that consists + /// of TermQuerys. + /// + public virtual Query Rewrite(IndexReader reader) + { + return this; + } + + + /// Expert: called when re-writing queries under MultiSearcher. + /// + /// Create a single query suitable for use by all subsearchers (in 1-1 + /// correspondence with queries). This is an optimization of the OR of + /// all queries. We handle the common optimization cases of equal + /// queries and overlapping clauses of boolean OR queries (as generated + /// by MultiTermQuery.rewrite()). + /// Be careful overriding this method as queries[0] determines which + /// method will be called and is not necessarily of the same type as + /// the other queries. + /// + public virtual Query Combine(Query[] queries) + { + var uniques = new System.Collections.Generic.HashSet(); + for (int i = 0; i < queries.Length; i++) + { + Query query = queries[i]; + BooleanClause[] clauses = null; + // check if we can split the query into clauses + bool splittable = (query is BooleanQuery); + if (splittable) + { + BooleanQuery bq = (BooleanQuery) query; + splittable = bq.IsCoordDisabled(); + clauses = bq.GetClauses(); + for (int j = 0; splittable && j < clauses.Length; j++) + { + splittable = (clauses[j].Occur == Occur.SHOULD); + } + } + if (splittable) + { + for (int j = 0; j < clauses.Length; j++) + { + uniques.Add(clauses[j].Query); + } + } + else + { + uniques.Add(query); + } + } + // optimization: if we have just one query, just return it + if (uniques.Count == 1) + { + return uniques.First(); + } + BooleanQuery result = new BooleanQuery(true); + foreach (Query key in uniques) + { + result.Add(key, Occur.SHOULD); + } + return result; + } + + + /// Expert: adds all terms occuring in this query to the terms set. Only + /// works if this query is in its rewritten form. + /// + /// + /// UnsupportedOperationException if this query is not yet rewritten + public virtual void ExtractTerms(System.Collections.Generic.ISet terms) + { + // needs to be implemented by query subclasses + throw new System.NotSupportedException(); + } + + + + /// Expert: merges the clauses of a set of BooleanQuery's into a single + /// BooleanQuery. + /// + ///

A utility for use by implementations. + ///

+ public static Query MergeBooleanQueries(params BooleanQuery[] queries) + { + var allClauses = new System.Collections.Generic.HashSet(); + foreach (BooleanQuery booleanQuery in queries) + { + foreach (BooleanClause clause in booleanQuery) + { + allClauses.Add(clause); + } + } + + bool coordDisabled = queries.Length == 0?false:queries[0].IsCoordDisabled(); + BooleanQuery result = new BooleanQuery(coordDisabled); + foreach(BooleanClause clause in allClauses) + { + result.Add(clause); + } + return result; + } + + + /// Expert: Returns the Similarity implementation to be used for this query. + /// Subclasses may override this method to specify their own Similarity + /// implementation, perhaps one that delegates through that of the Searcher. + /// By default the Searcher's Similarity implementation is returned. + /// + public virtual Similarity GetSimilarity(Searcher searcher) + { + return searcher.Similarity; + } + + /// Returns a clone of this query. + public virtual System.Object Clone() + { + try + { + return base.MemberwiseClone(); + } + catch (System.Exception e) + { + throw new System.SystemException("Clone not supported: " + e.Message); + } + } + + public override int GetHashCode() + { + int prime = 31; + int result = 1; + result = prime * result + BitConverter.ToInt32(BitConverter.GetBytes(boost), 0); + return result; + } + + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (obj == null) + return false; + if (GetType() != obj.GetType()) + return false; + Query other = (Query) obj; + if (BitConverter.ToInt32(BitConverter.GetBytes(boost), 0) != BitConverter.ToInt32(BitConverter.GetBytes(other.boost), 0)) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/QueryTermVector.cs b/external/Lucene.Net.Light/src/core/Search/QueryTermVector.cs new file mode 100644 index 0000000000..a52a6a3464 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/QueryTermVector.cs @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Analysis.Tokenattributes; +using Lucene.Net.Index; +using Lucene.Net.Support; +using Analyzer = Lucene.Net.Analysis.Analyzer; +using TokenStream = Lucene.Net.Analysis.TokenStream; + +namespace Lucene.Net.Search +{ + + /// + /// + /// + /// + public class QueryTermVector : ITermFreqVector + { + private System.String[] terms = new System.String[0]; + private int[] termFreqs = new int[0]; + + public virtual string Field + { + get { return null; } + } + + /// + /// The original list of terms from the query, can contain duplicates + /// + public QueryTermVector(System.String[] queryTerms) + { + + ProcessTerms(queryTerms); + } + + public QueryTermVector(System.String queryString, Analyzer analyzer) + { + if (analyzer != null) + { + TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString)); + if (stream != null) + { + IList terms = new List(); + try + { + bool hasMoreTokens = false; + + stream.Reset(); + ITermAttribute termAtt = stream.AddAttribute(); + + hasMoreTokens = stream.IncrementToken(); + while (hasMoreTokens) + { + terms.Add(termAtt.Term); + hasMoreTokens = stream.IncrementToken(); + } + ProcessTerms(terms.ToArray()); + } + catch (System.IO.IOException) + { + } + } + } + } + + private void ProcessTerms(System.String[] queryTerms) + { + if (queryTerms != null) + { + System.Array.Sort(queryTerms); + IDictionary tmpSet = new HashMap(queryTerms.Length); + //filter out duplicates + IList tmpList = new List(queryTerms.Length); + IList tmpFreqs = new List(queryTerms.Length); + int j = 0; + for (int i = 0; i < queryTerms.Length; i++) + { + var term = queryTerms[i]; + var position = tmpSet[term]; + if (!tmpSet.ContainsKey(term)) // if temp_position == null + { + tmpSet[term] = j++; + tmpList.Add(term); + tmpFreqs.Add(1); + } + else + { + int integer = tmpFreqs[position]; + tmpFreqs[position] = (integer + 1); + } + } + terms = tmpList.ToArray(); + //termFreqs = (int[])tmpFreqs.toArray(termFreqs); + termFreqs = new int[tmpFreqs.Count]; + int i2 = 0; + foreach (int integer in tmpFreqs) + { + termFreqs[i2++] = integer; + } + } + } + + public override System.String ToString() + { + System.Text.StringBuilder sb = new System.Text.StringBuilder(); + sb.Append('{'); + for (int i = 0; i < terms.Length; i++) + { + if (i > 0) + sb.Append(", "); + sb.Append(terms[i]).Append('/').Append(termFreqs[i]); + } + sb.Append('}'); + return sb.ToString(); + } + + + public virtual int Size + { + get { return terms.Length; } + } + + public virtual System.String[] GetTerms() + { + return terms; + } + + public virtual int[] GetTermFrequencies() + { + return termFreqs; + } + + public virtual int IndexOf(System.String term) + { + int res = System.Array.BinarySearch(terms, term); + return res >= 0?res:- 1; + } + + public virtual int[] IndexesOf(System.String[] terms, int start, int len) + { + int[] res = new int[len]; + + for (int i = 0; i < len; i++) + { + res[i] = IndexOf(terms[i]); + } + return res; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/QueryWrapperFilter.cs b/external/Lucene.Net.Light/src/core/Search/QueryWrapperFilter.cs new file mode 100644 index 0000000000..1642c97078 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/QueryWrapperFilter.cs @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// Constrains search results to only match those which also match a provided + /// query. + /// + ///

This could be used, for example, with a on a suitably + /// formatted date field to implement date filtering. One could re-use a single + /// QueryFilter that matches, e.g., only documents modified within the last + /// week. The QueryFilter and TermRangeQuery would only need to be reconstructed + /// once per day. + /// + ///

+ /// $Id:$ + /// + [Serializable] + public class QueryWrapperFilter:Filter + { + private class AnonymousClassDocIdSet:DocIdSet + { + public AnonymousClassDocIdSet(Lucene.Net.Search.Weight weight, Lucene.Net.Index.IndexReader reader, QueryWrapperFilter enclosingInstance) + { + InitBlock(weight, reader, enclosingInstance); + } + private void InitBlock(Lucene.Net.Search.Weight weight, Lucene.Net.Index.IndexReader reader, QueryWrapperFilter enclosingInstance) + { + this.weight = weight; + this.reader = reader; + this.enclosingInstance = enclosingInstance; + } + private Lucene.Net.Search.Weight weight; + private Lucene.Net.Index.IndexReader reader; + private QueryWrapperFilter enclosingInstance; + public QueryWrapperFilter Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public override DocIdSetIterator Iterator() + { + return weight.Scorer(reader, true, false); + } + + public override bool IsCacheable + { + get { return false; } + } + } + private Query query; + + /// Constructs a filter which only matches documents matching + /// query. + /// + public QueryWrapperFilter(Query query) + { + this.query = query; + } + + public override DocIdSet GetDocIdSet(IndexReader reader) + { + Weight weight = query.Weight(new IndexSearcher(reader)); + return new AnonymousClassDocIdSet(weight, reader, this); + } + + public override System.String ToString() + { + return "QueryWrapperFilter(" + query + ")"; + } + + public override bool Equals(System.Object o) + { + if (!(o is QueryWrapperFilter)) + return false; + return this.query.Equals(((QueryWrapperFilter) o).query); + } + + public override int GetHashCode() + { + return query.GetHashCode() ^ unchecked((int) 0x923F64B9); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ReqExclScorer.cs b/external/Lucene.Net.Light/src/core/Search/ReqExclScorer.cs new file mode 100644 index 0000000000..a56936254f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ReqExclScorer.cs @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + + /// A Scorer for queries with a required subscorer + /// and an excluding (prohibited) sub DocIdSetIterator. + ///
+ /// This Scorer implements , + /// and it uses the skipTo() on the given scorers. + ///
+ class ReqExclScorer:Scorer + { + private Scorer reqScorer; + private DocIdSetIterator exclDisi; + private int doc = - 1; + + /// Construct a ReqExclScorer. + /// The scorer that must match, except where + /// + /// indicates exclusion. + /// + public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi):base(null) + { // No similarity used. + this.reqScorer = reqScorer; + this.exclDisi = exclDisi; + } + + public override int NextDoc() + { + if (reqScorer == null) + { + return doc; + } + doc = reqScorer.NextDoc(); + if (doc == NO_MORE_DOCS) + { + reqScorer = null; // exhausted, nothing left + return doc; + } + if (exclDisi == null) + { + return doc; + } + return doc = ToNonExcluded(); + } + + /// Advance to non excluded doc. + ///
On entry: + /// + /// reqScorer != null, + /// exclScorer != null, + /// reqScorer was advanced once via next() or skipTo() + /// and reqScorer.doc() may still be excluded. + /// + /// Advances reqScorer a non excluded required doc, if any. + ///
+ /// true iff there is a non excluded required doc. + /// + private int ToNonExcluded() + { + int exclDoc = exclDisi.DocID(); + int reqDoc = reqScorer.DocID(); // may be excluded + do + { + if (reqDoc < exclDoc) + { + return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded + } + else if (reqDoc > exclDoc) + { + exclDoc = exclDisi.Advance(reqDoc); + if (exclDoc == NO_MORE_DOCS) + { + exclDisi = null; // exhausted, no more exclusions + return reqDoc; + } + if (exclDoc > reqDoc) + { + return reqDoc; // not excluded + } + } + } + while ((reqDoc = reqScorer.NextDoc()) != NO_MORE_DOCS); + reqScorer = null; // exhausted, nothing left + return NO_MORE_DOCS; + } + + public override int DocID() + { + return doc; + } + + /// Returns the score of the current document matching the query. + /// Initially invalid, until is called the first time. + /// + /// The score of the required scorer. + /// + public override float Score() + { + return reqScorer.Score(); // reqScorer may be null when next() or skipTo() already return false + } + + public override int Advance(int target) + { + if (reqScorer == null) + { + return doc = NO_MORE_DOCS; + } + if (exclDisi == null) + { + return doc = reqScorer.Advance(target); + } + if (reqScorer.Advance(target) == NO_MORE_DOCS) + { + reqScorer = null; + return doc = NO_MORE_DOCS; + } + return doc = ToNonExcluded(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ReqOptSumScorer.cs b/external/Lucene.Net.Light/src/core/Search/ReqOptSumScorer.cs new file mode 100644 index 0000000000..b432f135c9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ReqOptSumScorer.cs @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// A Scorer for queries with a required part and an optional part. + /// Delays skipTo() on the optional part until a score() is needed. + ///
+ /// This Scorer implements . + ///
+ class ReqOptSumScorer:Scorer + { + /// The scorers passed from the constructor. + /// These are set to null as soon as their next() or skipTo() returns false. + /// + private Scorer reqScorer; + private Scorer optScorer; + + /// Construct a ReqOptScorer. + /// The required scorer. This must match. + /// + /// The optional scorer. This is used for scoring only. + /// + public ReqOptSumScorer(Scorer reqScorer, Scorer optScorer):base(null) + { // No similarity used. + this.reqScorer = reqScorer; + this.optScorer = optScorer; + } + + public override int NextDoc() + { + return reqScorer.NextDoc(); + } + + public override int Advance(int target) + { + return reqScorer.Advance(target); + } + + public override int DocID() + { + return reqScorer.DocID(); + } + + /// Returns the score of the current document matching the query. + /// Initially invalid, until is called the first time. + /// + /// The score of the required scorer, eventually increased by the score + /// of the optional scorer when it also matches the current document. + /// + public override float Score() + { + int curDoc = reqScorer.DocID(); + float reqScore = reqScorer.Score(); + if (optScorer == null) + { + return reqScore; + } + + int optScorerDoc = optScorer.DocID(); + if (optScorerDoc < curDoc && (optScorerDoc = optScorer.Advance(curDoc)) == NO_MORE_DOCS) + { + optScorer = null; + return reqScore; + } + + return optScorerDoc == curDoc?reqScore + optScorer.Score():reqScore; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ScoreCachingWrappingScorer.cs b/external/Lucene.Net.Light/src/core/Search/ScoreCachingWrappingScorer.cs new file mode 100644 index 0000000000..c60357b357 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ScoreCachingWrappingScorer.cs @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// A which wraps another scorer and caches the score of the + /// current document. Successive calls to will return the same + /// result and will not invoke the wrapped Scorer's score() method, unless the + /// current document has changed.
+ /// This class might be useful due to the changes done to the + /// interface, in which the score is not computed for a document by default, only + /// if the collector requests it. Some collectors may need to use the score in + /// several places, however all they have in hand is a object, and + /// might end up computing the score of a document more than once. + ///
+ public class ScoreCachingWrappingScorer:Scorer + { + + private Scorer scorer; + private int curDoc = - 1; + private float curScore; + + /// Creates a new instance by wrapping the given scorer. + public ScoreCachingWrappingScorer(Scorer scorer):base(scorer.Similarity) + { + this.scorer = scorer; + } + + public /*protected internal*/ override bool Score(Collector collector, int max, int firstDocID) + { + return scorer.Score(collector, max, firstDocID); + } + + public override Similarity Similarity + { + get { return scorer.Similarity; } + } + + public override float Score() + { + int doc = scorer.DocID(); + if (doc != curDoc) + { + curScore = scorer.Score(); + curDoc = doc; + } + + return curScore; + } + + public override int DocID() + { + return scorer.DocID(); + } + + public override int NextDoc() + { + return scorer.NextDoc(); + } + + public override void Score(Collector collector) + { + scorer.Score(collector); + } + + public override int Advance(int target) + { + return scorer.Advance(target); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/ScoreDoc.cs b/external/Lucene.Net.Light/src/core/Search/ScoreDoc.cs new file mode 100644 index 0000000000..2ac34179d1 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/ScoreDoc.cs @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// Expert: Returned by low-level search implementations. + /// + /// + [Serializable] + public class ScoreDoc + { + /// Expert: The score of this document for the query. + public float Score { get; set; } + + /// Expert: A hit document's number. + /// + /// + public int Doc { get; set; } + + /// Expert: Constructs a ScoreDoc. + public ScoreDoc(int doc, float score) + { + this.Doc = doc; + this.Score = score; + } + + // A convenience method for debugging. + public override System.String ToString() + { + return "doc=" + Doc + " score=" + Score; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Scorer.cs b/external/Lucene.Net.Light/src/core/Search/Scorer.cs new file mode 100644 index 0000000000..17885acabb --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Scorer.cs @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; + +namespace Lucene.Net.Search +{ + + /// Expert: Common scoring functionality for different types of queries. + /// + ///

+ /// A Scorer iterates over documents matching a + /// query in increasing order of doc Id. + ///

+ ///

+ /// Document scores are computed using a given Similarity + /// implementation. + ///

+ /// + ///

NOTE: The values Float.Nan, + /// Float.NEGATIVE_INFINITY and Float.POSITIVE_INFINITY are + /// not valid scores. Certain collectors (eg + ///) will not properly collect hits + /// with these scores. + ///

+ public abstract class Scorer:DocIdSetIterator + { + private Similarity similarity; + + /// Constructs a Scorer. + /// The Similarity implementation used by this scorer. + /// + protected internal Scorer(Similarity similarity) + { + this.similarity = similarity; + } + + /// Returns the Similarity implementation used by this scorer. + public virtual Similarity Similarity + { + get { return this.similarity; } + } + + /// Scores and collects all matching documents. + /// The collector to which all matching documents are passed. + /// + public virtual void Score(Collector collector) + { + collector.SetScorer(this); + int doc; + while ((doc = NextDoc()) != NO_MORE_DOCS) + { + collector.Collect(doc); + } + } + + /// Expert: Collects matching documents in a range. Hook for optimization. + /// Note, is added to ensure that + /// was called before this method. + /// + /// + /// The collector to which all matching documents are passed. + /// + /// Do not score documents past this. + /// + /// + /// The first document ID (ensures is called before + /// this method. + /// + /// true if more matching documents may remain. + /// + public /*protected internal*/ virtual bool Score(Collector collector, int max, int firstDocID) + { + collector.SetScorer(this); + int doc = firstDocID; + while (doc < max) + { + collector.Collect(doc); + doc = NextDoc(); + } + return doc != NO_MORE_DOCS; + } + + /// Returns the score of the current document matching the query. + /// Initially invalid, until or + /// is called the first time, or when called from within + /// . + /// + public abstract float Score(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Searchable.cs b/external/Lucene.Net.Light/src/core/Search/Searchable.cs new file mode 100644 index 0000000000..3c595dfd29 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Searchable.cs @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; +using Document = Lucene.Net.Documents.Document; +using FieldSelector = Lucene.Net.Documents.FieldSelector; +using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; +using Term = Lucene.Net.Index.Term; + +namespace Lucene.Net.Search +{ + + /// The interface for search implementations. + /// + ///

+ /// Searchable is the abstract network protocol for searching. Implementations + /// provide search over a single index, over multiple indices, and over indices + /// on remote servers. + /// + ///

+ /// Queries, filters and sort criteria are designed to be compact so that they + /// may be efficiently passed to a remote index, with only the top-scoring hits + /// being returned, rather than every matching hit. + /// + /// NOTE: this interface is kept public for convenience. Since it is not + /// expected to be implemented directly, it may be changed unexpectedly between + /// releases. + ///

+ public interface Searchable : IDisposable + { + /// Lower-level search API. + /// + ///

+ /// is called for every document.
+ /// Collector-based access to remote indexes is discouraged. + /// + ///

+ /// Applications should only use this if they need all of the matching + /// documents. The high-level search API () is + /// usually more efficient, as it skips non-high-scoring hits. + /// + ///

+ /// to match documents + /// + /// if non-null, used to permit documents to be collected. + /// + /// to receive hits + /// + /// BooleanQuery.TooManyClauses + void Search(Weight weight, Filter filter, Collector collector); + + /// Frees resources associated with this Searcher. + /// Be careful not to call this method while you are still using objects + /// that reference this searchable + /// + void Close(); + + /// Expert: Returns the number of documents containing term. + /// Called by search code to compute term weights. + /// + /// + /// + int DocFreq(Term term); + + /// Expert: For each term in the terms array, calculates the number of + /// documents containing term. Returns an array with these + /// document frequencies. Used to minimize number of remote calls. + /// + int[] DocFreqs(Term[] terms); + + /// Expert: Returns one greater than the largest possible document number. + /// Called by search code to compute term weights. + /// + /// + /// + int MaxDoc { get; } + + /// + /// Expert: Low-level search implementation. Finds the top n + /// hits for query, applying filter if non-null. + /// + ///

Applications should usually call or + /// instead. + ///

+ /// BooleanQuery.TooManyClauses + TopDocs Search(Weight weight, Filter filter, int n); + + /// Expert: Returns the stored fields of document i. + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + Document Doc(int i); + + /// Get the at the nth position. The + /// may be used to determine what s to load and how they should be loaded. + /// + /// NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the lazy is + /// loaded an exception may be thrown. If you want the value of a lazy to be available after closing you must + /// explicitly load it or fetch the Document again with a new loader. + /// + /// + /// + /// Get the document at the nth position + /// + /// The to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. + /// + /// The stored fields of the at the nth position + /// + /// CorruptIndexException if the index is corrupt + /// IOException if there is a low-level IO error + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + Document Doc(int n, FieldSelector fieldSelector); + + /// Expert: called to re-write queries into primitive queries. + /// BooleanQuery.TooManyClauses + Query Rewrite(Query query); + + /// Expert: low-level implementation method + /// Returns an Explanation that describes how doc scored against + /// weight. + /// + ///

This is intended to be used in developing Similarity implementations, + /// and, for good performance, should not be displayed with every hit. + /// Computing an explanation is as expensive as executing the query over the + /// entire index. + ///

Applications should call . + ///

+ /// BooleanQuery.TooManyClauses + Explanation Explain(Weight weight, int doc); + + /// Expert: Low-level search implementation with arbitrary sorting. Finds + /// the top n hits for query, applying + /// filter if non-null, and sorting the hits by the criteria in + /// sort. + /// + ///

Applications should usually call + /// instead. + /// + ///

+ /// BooleanQuery.TooManyClauses + TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Searcher.cs b/external/Lucene.Net.Light/src/core/Search/Searcher.cs new file mode 100644 index 0000000000..a545e117cf --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Searcher.cs @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; +using Document = Lucene.Net.Documents.Document; +using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; +using Term = Lucene.Net.Index.Term; + +namespace Lucene.Net.Search +{ + + /// An abstract base class for search implementations. Implements the main search + /// methods. + /// + ///

+ /// Note that you can only access hits from a Searcher as long as it is not yet + /// closed, otherwise an IOException will be thrown. + ///

+ public abstract class Searcher : System.MarshalByRefObject, Searchable, System.IDisposable + { + protected Searcher() + { + InitBlock(); + } + private void InitBlock() + { + similarity = Net.Search.Similarity.Default; + } + + /// Search implementation with arbitrary sorting. Finds + /// the top n hits for query, applying + /// filter if non-null, and sorting the hits by the criteria in + /// sort. + /// + ///

NOTE: this does not compute scores by default; use + /// to enable scoring. + /// + ///

+ /// BooleanQuery.TooManyClauses + public virtual TopFieldDocs Search(Query query, Filter filter, int n, Sort sort) + { + return Search(CreateWeight(query), filter, n, sort); + } + + /// Lower-level search API. + /// + ///

is called for every matching document. + /// + ///

Applications should only use this if they need all of the matching + /// documents. The high-level search API ( + /// ) is usually more efficient, as it skips non-high-scoring hits. + ///

Note: The score passed to this method is a raw score. + /// In other words, the score will not necessarily be a float whose value is + /// between 0 and 1. + ///

+ /// BooleanQuery.TooManyClauses + public virtual void Search(Query query, Collector results) + { + Search(CreateWeight(query), null, results); + } + + /// Lower-level search API. + /// + ///

is called for every matching + /// document. + ///
Collector-based access to remote indexes is discouraged. + /// + ///

Applications should only use this if they need all of the + /// matching documents. The high-level search API () + /// is usually more efficient, as it skips + /// non-high-scoring hits. + /// + ///

+ /// to match documents + /// + /// if non-null, used to permit documents to be collected. + /// + /// to receive hits + /// + /// BooleanQuery.TooManyClauses + public virtual void Search(Query query, Filter filter, Collector results) + { + Search(CreateWeight(query), filter, results); + } + + /// Finds the top n + /// hits for query, applying filter if non-null. + /// + /// + /// BooleanQuery.TooManyClauses + public virtual TopDocs Search(Query query, Filter filter, int n) + { + return Search(CreateWeight(query), filter, n); + } + + /// Finds the top n + /// hits for query. + /// + /// + /// BooleanQuery.TooManyClauses + public virtual TopDocs Search(Query query, int n) + { + return Search(query, null, n); + } + + /// Returns an Explanation that describes how doc scored against + /// query. + /// + ///

This is intended to be used in developing Similarity implementations, + /// and, for good performance, should not be displayed with every hit. + /// Computing an explanation is as expensive as executing the query over the + /// entire index. + ///

+ public virtual Explanation Explain(Query query, int doc) + { + return Explain(CreateWeight(query), doc); + } + + /// The Similarity implementation used by this searcher. + private Similarity similarity; + + /// Expert: Gets or Sets the Similarity implementation used by this Searcher. + /// + /// + /// + /// + public virtual Similarity Similarity + { + get { return this.similarity; } + set { this.similarity = value; } + } + + /// creates a weight for query + /// new weight + /// + public /*protected internal*/ virtual Weight CreateWeight(Query query) + { + return query.Weight(this); + } + + // inherit javadoc + public virtual int[] DocFreqs(Term[] terms) + { + int[] result = new int[terms.Length]; + for (int i = 0; i < terms.Length; i++) + { + result[i] = DocFreq(terms[i]); + } + return result; + } + + public abstract void Search(Weight weight, Filter filter, Collector results); + + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + + public abstract int DocFreq(Term term); + public abstract int MaxDoc { get; } + public abstract TopDocs Search(Weight weight, Filter filter, int n); + public abstract Document Doc(int i); + public abstract Document Doc(int docid, FieldSelector fieldSelector); + public abstract Query Rewrite(Query query); + public abstract Explanation Explain(Weight weight, int doc); + public abstract TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort); + /* End patch for GCJ bug #15411. */ + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Similarity.cs b/external/Lucene.Net.Light/src/core/Search/Similarity.cs new file mode 100644 index 0000000000..18583e3bef --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Similarity.cs @@ -0,0 +1,697 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Documents; +using FieldInvertState = Lucene.Net.Index.FieldInvertState; +using Term = Lucene.Net.Index.Term; +using SmallFloat = Lucene.Net.Util.SmallFloat; +using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation; + +namespace Lucene.Net.Search +{ + + /// Expert: Scoring API. + ///

Subclasses implement search scoring. + /// + ///

The score of query q for document d correlates to the + /// cosine-distance or dot-product between document and query vectors in a + /// + /// Vector Space Model (VSM) of Information Retrieval. + /// A document whose vector is closer to the query vector in that model is scored higher. + /// + /// The score is computed as follows: + /// + ///

+ /// + /// + ///
+ /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
+ /// score(q,d)   =   + /// coord(q,d)  ·  + /// queryNorm(q)  ·  + /// + /// + /// + /// ( + /// tf(t in d)  ·  + /// idf(t)2  ·  + /// t.Boost ·  + /// norm(t,d) + /// ) + ///
t in q
+ ///
+ /// + ///

where + /// + /// + /// + /// tf(t in d) + /// correlates to the term's frequency, + /// defined as the number of times term t appears in the currently scored document d. + /// Documents that have more occurrences of a given term receive a higher score. + /// The default computation for tf(t in d) in + /// DefaultSimilarity is: + /// + ///
 
+ /// + /// + /// + /// + /// + ///
+ /// tf(t in d)   =   + /// + /// frequency½ + ///
+ ///
 
+ ///
+ /// + /// + /// + /// idf(t) stands for Inverse Document Frequency. This value + /// correlates to the inverse of docFreq + /// (the number of documents in which the term t appears). + /// This means rarer terms give higher contribution to the total score. + /// The default computation for idf(t) in + /// DefaultSimilarity is: + /// + ///
 
+ /// + /// + /// + /// + /// + /// + /// + ///
+ /// idf(t)  =   + /// + /// 1 + log ( + /// + /// + /// + /// + /// + ///
numDocs
–––––––––
docFreq+1
+ ///
+ /// ) + ///
+ ///
 
+ ///
+ /// + /// + /// + /// coord(q,d) + /// is a score factor based on how many of the query terms are found in the specified document. + /// Typically, a document that contains more of the query's terms will receive a higher score + /// than another document with fewer query terms. + /// This is a search time factor computed in + /// coord(q,d) + /// by the Similarity in effect at search time. + ///
 
+ ///
+ /// + /// + /// + /// queryNorm(q) + /// + /// is a normalizing factor used to make scores between queries comparable. + /// This factor does not affect document ranking (since all ranked documents are multiplied by the same factor), + /// but rather just attempts to make scores from different queries (or even different indexes) comparable. + /// This is a search time factor computed by the Similarity in effect at search time. + /// + /// The default computation in + /// DefaultSimilarity + /// is: + ///
 
+ /// + /// + /// + /// + /// + ///
+ /// queryNorm(q)   =   + /// queryNorm(sumOfSquaredWeights) + ///   =   + /// + /// + /// + /// + /// + ///
1
+ /// –––––––––––––– + ///
sumOfSquaredWeights½
+ ///
+ ///
 
+ /// + /// The sum of squared weights (of the query terms) is + /// computed by the query object. + /// For example, a boolean query + /// computes this value as: + /// + ///
 
+ /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
+ /// GetSumOfSquaredWeights   =   + /// q.Boost 2 + ///  ·  + /// + /// + /// + /// ( + /// idf(t)  ·  + /// t.Boost + /// ) 2 + ///
t in q
+ ///
 
+ /// + ///
+ /// + /// + /// + /// t.Boost + /// is a search time boost of term t in the query q as + /// specified in the query text + /// (see query syntax), + /// or as set by application calls to + /// . + /// Notice that there is really no direct API for accessing a boost of one term in a multi term query, + /// but rather multi terms are represented in a query as multi + /// TermQuery objects, + /// and so the boost of a term in the query is accessible by calling the sub-query + /// . + ///
 
+ ///
+ /// + /// + /// + /// norm(t,d) encapsulates a few (indexing time) boost and length factors: + /// + /// + /// Document boost - set by calling + /// doc.Boost + /// before adding the document to the index. + /// + /// Field boost - set by calling + /// field.Boost + /// before adding the field to a document. + /// + /// LengthNorm(field) - computed + /// when the document is added to the index in accordance with the number of tokens + /// of this field in the document, so that shorter fields contribute more to the score. + /// LengthNorm is computed by the Similarity class in effect at indexing. + /// + /// + /// + ///

+ /// When a document is added to the index, all the above factors are multiplied. + /// If the document has multiple fields with the same name, all their boosts are multiplied together: + /// + ///
 
+ /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
+ /// norm(t,d)   =   + /// doc.Boost + ///  ·  + /// LengthNorm(field) + ///  ·  + /// + /// + /// + /// field.Boost + ///
field f in d named as t
+ ///
 
+ /// However the resulted norm value is encoded as a single byte + /// before being stored. + /// At search time, the norm byte value is read from the index + /// directory and + /// decoded back to a float norm value. + /// This encoding/decoding, while reducing index size, comes with the price of + /// precision loss - it is not guaranteed that decode(encode(x)) = x. + /// For instance, decode(encode(0.89)) = 0.75. + /// Also notice that search time is too late to modify this norm part of scoring, e.g. by + /// using a different for search. + ///
 
+ /// + /// + /// + ///

+ /// + /// + /// + /// + /// + /// + [Serializable] + public abstract class Similarity + { + protected Similarity() + { + InitBlock(); + } + [Serializable] + private class AnonymousClassIDFExplanation1:IDFExplanation + { + public AnonymousClassIDFExplanation1(int df, int max, float idf, Similarity enclosingInstance) + { + InitBlock(df, max, idf, enclosingInstance); + } + private void InitBlock(int df, int max, float idf, Similarity enclosingInstance) + { + this.df = df; + this.max = max; + this.idf = idf; + this.enclosingInstance = enclosingInstance; + } + private int df; + private int max; + private float idf; + private Similarity enclosingInstance; + public Similarity Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + //@Override + public override System.String Explain() + { + return "idf(docFreq=" + df + ", maxDocs=" + max + ")"; + } + //@Override + + public override float Idf + { + get { return idf; } + } + } + [Serializable] + private class AnonymousClassIDFExplanation3:IDFExplanation + { + public AnonymousClassIDFExplanation3(float fIdf, System.Text.StringBuilder exp, Similarity enclosingInstance) + { + InitBlock(fIdf, exp, enclosingInstance); + } + private void InitBlock(float fIdf, System.Text.StringBuilder exp, Similarity enclosingInstance) + { + this.fIdf = fIdf; + this.exp = exp; + this.enclosingInstance = enclosingInstance; + } + private float fIdf; + private System.Text.StringBuilder exp; + private Similarity enclosingInstance; + public Similarity Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + //@Override + + public override float Idf + { + get { return fIdf; } + } + + //@Override + public override System.String Explain() + { + return exp.ToString(); + } + } + private void InitBlock() + { + + } + + /// The Similarity implementation used by default. + private static Similarity defaultImpl = new DefaultSimilarity(); + public const int NO_DOC_ID_PROVIDED = - 1; + + /// Gets or sets the default Similarity implementation + /// used by indexing and search code. + ///

This is initially an instance of . + ///

+ /// + /// + /// + /// + public static Similarity Default + { + get { return defaultImpl; } + set { defaultImpl = value; } + } + + /// Cache of decoded bytes. + private static readonly float[] NORM_TABLE = new float[256]; + + /// Decodes a normalization factor stored in an index. + /// + /// + public static float DecodeNorm(byte b) + { + return NORM_TABLE[b & 0xFF]; // & 0xFF maps negative bytes to positive above 127 + } + + /// Returns a table for decoding normalization bytes. + /// + /// + public static float[] GetNormDecoder() + { + return NORM_TABLE; + } + + /// Compute the normalization value for a field, given the accumulated + /// state of term processing for this field (see ). + /// + ///

Implementations should calculate a float value based on the field + /// state and then return that value. + /// + ///

For backward compatibility this method by default calls + /// passing + /// as the second argument, and + /// then multiplies this value by .

+ /// + ///

WARNING: This API is new and experimental and may + /// suddenly change.

+ /// + ///

+ /// field name + /// + /// current processing state for this field + /// + /// the calculated float norm + /// + public virtual float ComputeNorm(System.String field, FieldInvertState state) + { + return (float) (state.Boost * LengthNorm(field, state.Length)); + } + + /// Computes the normalization value for a field given the total number of + /// terms contained in a field. These values, together with field boosts, are + /// stored in an index and multipled into scores for hits on each field by the + /// search code. + /// + ///

Matches in longer fields are less precise, so implementations of this + /// method usually return smaller values when numTokens is large, + /// and larger values when numTokens is small. + /// + ///

Note that the return values are computed under + /// + /// and then stored using + /// . + /// Thus they have limited precision, and documents + /// must be re-indexed if this method is altered. + /// + ///

+ /// the name of the field + /// + /// the total number of tokens contained in fields named + /// fieldName of doc. + /// + /// a normalization factor for hits on this field of this document + /// + /// + /// + public abstract float LengthNorm(System.String fieldName, int numTokens); + + /// Computes the normalization value for a query given the sum of the squared + /// weights of each of the query terms. This value is then multipled into the + /// weight of each query term. + /// + ///

This does not affect ranking, but rather just attempts to make scores + /// from different queries comparable. + /// + ///

+ /// the sum of the squares of query term weights + /// + /// a normalization factor for query weights + /// + public abstract float QueryNorm(float sumOfSquaredWeights); + + /// Encodes a normalization factor for storage in an index. + /// + ///

The encoding uses a three-bit mantissa, a five-bit exponent, and + /// the zero-exponent point at 15, thus + /// representing values from around 7x10^9 to 2x10^-9 with about one + /// significant decimal digit of accuracy. Zero is also represented. + /// Negative numbers are rounded up to zero. Values too large to represent + /// are rounded down to the largest representable value. Positive values too + /// small to represent are rounded up to the smallest positive representable + /// value. + /// + ///

+ /// + /// + public static byte EncodeNorm(float f) + { + return (byte) SmallFloat.FloatToByte315(f); + } + + + /// Computes a score factor based on a term or phrase's frequency in a + /// document. This value is multiplied by the + /// factor for each term in the query and these products are then summed to + /// form the initial score for a document. + /// + ///

Terms and phrases repeated in a document indicate the topic of the + /// document, so implementations of this method usually return larger values + /// when freq is large, and smaller values when freq + /// is small. + /// + ///

The default implementation calls . + /// + ///

+ /// the frequency of a term within a document + /// + /// a score factor based on a term's within-document frequency + /// + public virtual float Tf(int freq) + { + return Tf((float) freq); + } + + /// Computes the amount of a sloppy phrase match, based on an edit distance. + /// This value is summed for each sloppy phrase match in a document to form + /// the frequency that is passed to . + /// + ///

A phrase match with a small edit distance to a document passage more + /// closely matches the document, so implementations of this method usually + /// return larger values when the edit distance is small and smaller values + /// when it is large. + /// + ///

+ /// + /// the edit distance of this sloppy phrase match + /// the frequency increment for this match + public abstract float SloppyFreq(int distance); + + /// Computes a score factor based on a term or phrase's frequency in a + /// document. This value is multiplied by the + /// factor for each term in the query and these products are then summed to + /// form the initial score for a document. + /// + ///

Terms and phrases repeated in a document indicate the topic of the + /// document, so implementations of this method usually return larger values + /// when freq is large, and smaller values when freq + /// is small. + /// + ///

+ /// the frequency of a term within a document + /// + /// a score factor based on a term's within-document frequency + /// + public abstract float Tf(float freq); + + /// Computes a score factor for a simple term and returns an explanation + /// for that score factor. + /// + ///

+ /// The default implementation uses: + /// + /// + /// idf(searcher.docFreq(term), searcher.MaxDoc); + /// + /// + /// Note that is used instead of + /// because it is + /// proportional to , i.e., when one is + /// inaccurate, so is the other, and in the same direction. + /// + ///

+ /// the term in question + /// + /// the document collection being searched + /// + /// an IDFExplain object that includes both an idf score factor + /// and an explanation for the term. + /// + /// IOException + public virtual IDFExplanation IdfExplain(Term term, Searcher searcher) + { + int df = searcher.DocFreq(term); + int max = searcher.MaxDoc; + float idf2 = Idf(df, max); + return new AnonymousClassIDFExplanation1(df, max, idf2, this); + } + + /// Computes a score factor for a phrase. + /// + ///

+ /// The default implementation sums the idf factor for + /// each term in the phrase. + /// + ///

+ /// the terms in the phrase + /// + /// the document collection being searched + /// + /// an IDFExplain object that includes both an idf + /// score factor for the phrase and an explanation + /// for each term. + /// + /// IOException + public virtual IDFExplanation IdfExplain(ICollection terms, Searcher searcher) + { + int max = searcher.MaxDoc; + float idf2 = 0.0f; + System.Text.StringBuilder exp = new System.Text.StringBuilder(); + foreach (Term term in terms) + { + int df = searcher.DocFreq(term); + idf2 += Idf(df, max); + exp.Append(" "); + exp.Append(term.Text); + exp.Append("="); + exp.Append(df); + } + float fIdf = idf2; + return new AnonymousClassIDFExplanation3(fIdf, exp, this); + } + + /// Computes a score factor based on a term's document frequency (the number + /// of documents which contain the term). This value is multiplied by the + /// factor for each term in the query and these products are + /// then summed to form the initial score for a document. + /// + ///

Terms that occur in fewer documents are better indicators of topic, so + /// implementations of this method usually return larger values for rare terms, + /// and smaller values for common terms. + /// + ///

+ /// the number of documents which contain the term + /// + /// the total number of documents in the collection + /// + /// a score factor based on the term's document frequency + /// + public abstract float Idf(int docFreq, int numDocs); + + /// Computes a score factor based on the fraction of all query terms that a + /// document contains. This value is multiplied into scores. + /// + ///

The presence of a large portion of the query terms indicates a better + /// match with the query, so implementations of this method usually return + /// larger values when the ratio between these parameters is large and smaller + /// values when the ratio between them is small. + /// + ///

+ /// the number of query terms matched in the document + /// + /// the total number of terms in the query + /// + /// a score factor based on term overlap with the query + /// + public abstract float Coord(int overlap, int maxOverlap); + + + /// Calculate a scoring factor based on the data in the payload. Overriding implementations + /// are responsible for interpreting what is in the payload. Lucene makes no assumptions about + /// what is in the byte array. + ///

+ /// The default implementation returns 1. + /// + ///

+ /// The docId currently being scored. If this value is , then it should be assumed that the PayloadQuery implementation does not provide document information + /// + /// The fieldName of the term this payload belongs to + /// + /// The start position of the payload + /// + /// The end position of the payload + /// + /// The payload byte array to be scored + /// + /// The offset into the payload array + /// + /// The length in the array + /// + /// An implementation dependent float to be used as a scoring factor + /// + /// + public virtual float ScorePayload(int docId, System.String fieldName, int start, int end, byte[] payload, int offset, int length) + { + return 1; + } + + static Similarity() + { + { + for (int i = 0; i < 256; i++) + NORM_TABLE[i] = SmallFloat.Byte315ToFloat((byte) i); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/SimilarityDelegator.cs b/external/Lucene.Net.Light/src/core/Search/SimilarityDelegator.cs new file mode 100644 index 0000000000..433fc0b007 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/SimilarityDelegator.cs @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using FieldInvertState = Lucene.Net.Index.FieldInvertState; + +namespace Lucene.Net.Search +{ + /// Expert: Delegating scoring implementation. Useful in + /// implementations, to override only certain + /// methods of a Searcher's Similiarty implementation.. + /// + [Serializable] + public class SimilarityDelegator:Similarity + { + private Similarity delegee; + + /// Construct a that delegates all methods to another. + /// the Similarity implementation to delegate to + public SimilarityDelegator(Similarity delegee) + { + this.delegee = delegee; + } + + public override float ComputeNorm(System.String fieldName, FieldInvertState state) + { + return delegee.ComputeNorm(fieldName, state); + } + + public override float LengthNorm(System.String fieldName, int numTerms) + { + return delegee.LengthNorm(fieldName, numTerms); + } + + public override float QueryNorm(float sumOfSquaredWeights) + { + return delegee.QueryNorm(sumOfSquaredWeights); + } + + public override float Tf(float freq) + { + return delegee.Tf(freq); + } + + public override float SloppyFreq(int distance) + { + return delegee.SloppyFreq(distance); + } + + public override float Idf(int docFreq, int numDocs) + { + return delegee.Idf(docFreq, numDocs); + } + + public override float Coord(int overlap, int maxOverlap) + { + return delegee.Coord(overlap, maxOverlap); + } + + public override float ScorePayload(int docId, string fieldName, int start, int end, byte[] payload, int offset, int length) + { + return delegee.ScorePayload(docId, fieldName, start, end, payload, offset, length); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/SingleTermEnum.cs b/external/Lucene.Net.Light/src/core/Search/SingleTermEnum.cs new file mode 100644 index 0000000000..f80b8d39d9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/SingleTermEnum.cs @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; + +namespace Lucene.Net.Search +{ + /// + /// Subclass of FilteredTermEnum for enumerating a single term. + ///

+ /// This can be used by s that need only visit one term, + /// but want to preserve MultiTermQuery semantics such as + /// . + ///

+ public class SingleTermEnum : FilteredTermEnum + { + private Term singleTerm; + private bool _endEnum = false; + + /// + /// Creates a new SingleTermEnum. + ///

+ /// After calling the constructor the enumeration is already pointing to the term, + /// if it exists. + ///

+ public SingleTermEnum(IndexReader reader, Term singleTerm) + { + this.singleTerm = singleTerm; + SetEnum(reader.Terms(singleTerm)); + } + + public override float Difference() + { + return 1.0F; + } + + public override bool EndEnum() + { + return _endEnum; + } + + protected internal override bool TermCompare(Term term) + { + if (term.Equals(singleTerm)) + { + return true; + } + else + { + _endEnum = true; + return false; + } + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Search/SloppyPhraseScorer.cs b/external/Lucene.Net.Light/src/core/Search/SloppyPhraseScorer.cs new file mode 100644 index 0000000000..2052c2b7a4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/SloppyPhraseScorer.cs @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using Lucene.Net.Support; +using TermPositions = Lucene.Net.Index.TermPositions; + +namespace Lucene.Net.Search +{ + + sealed class SloppyPhraseScorer:PhraseScorer + { + private int slop; + private PhrasePositions[] repeats; + private PhrasePositions[] tmpPos; // for flipping repeating pps. + private bool checkedRepeats; + + internal SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, int slop, byte[] norms):base(weight, tps, offsets, similarity, norms) + { + this.slop = slop; + } + + /// Score a candidate doc for all slop-valid position-combinations (matches) + /// encountered while traversing/hopping the PhrasePositions. + ///
The score contribution of a match depends on the distance: + ///
- highest score for distance=0 (exact match). + ///
- score gets lower as distance gets higher. + ///
Example: for query "a b"~2, a document "x a b a y" can be scored twice: + /// once for "a b" (distance=0), and once for "b a" (distance=2). + ///
Possibly not all valid combinations are encountered, because for efficiency + /// we always propagate the least PhrasePosition. This allows to base on + /// PriorityQueue and move forward faster. + /// As result, for example, document "a b c b a" + /// would score differently for queries "a b c"~4 and "c b a"~4, although + /// they really are equivalent. + /// Similarly, for doc "a b c b a f g", query "c b"~2 + /// would get same score as "g f"~2, although "c b"~2 could be matched twice. + /// We may want to fix this in the future (currently not, for performance reasons). + ///
+ protected internal override float PhraseFreq() + { + int end = InitPhrasePositions(); + + float freq = 0.0f; + bool done = (end < 0); + while (!done) + { + PhrasePositions pp = pq.Pop(); + int start = pp.position; + int next = pq.Top().position; + + bool tpsDiffer = true; + for (int pos = start; pos <= next || !tpsDiffer; pos = pp.position) + { + if (pos <= next && tpsDiffer) + start = pos; // advance pp to min window + if (!pp.NextPosition()) + { + done = true; // ran out of a term -- done + break; + } + PhrasePositions pp2 = null; + tpsDiffer = !pp.repeats || (pp2 = TermPositionsDiffer(pp)) == null; + if (pp2 != null && pp2 != pp) + { + pp = Flip(pp, pp2); // flip pp to pp2 + } + } + + int matchLength = end - start; + if (matchLength <= slop) + freq += Similarity.SloppyFreq(matchLength); // score match + + if (pp.position > end) + end = pp.position; + pq.Add(pp); // restore pq + } + + return freq; + } + + // flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back. + // assumes: pp!=pp2, pp2 in pq, pp not in pq. + // called only when there are repeating pps. + private PhrasePositions Flip(PhrasePositions pp, PhrasePositions pp2) + { + int n = 0; + PhrasePositions pp3; + //pop until finding pp2 + while ((pp3 = pq.Pop()) != pp2) + { + tmpPos[n++] = pp3; + } + //insert back all but pp2 + for (n--; n >= 0; n--) + { + pq.InsertWithOverflow(tmpPos[n]); + } + //insert pp back + pq.Add(pp); + return pp2; + } + + /// Init PhrasePositions in place. + /// There is a one time initialization for this scorer: + ///
- Put in repeats[] each pp that has another pp with same position in the doc. + ///
- Also mark each such pp by pp.repeats = true. + ///
Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. + /// In particular, this allows to score queries with no repetitions with no overhead due to this computation. + ///
- Example 1 - query with no repetitions: "ho my"~2 + ///
- Example 2 - query with repetitions: "ho my my"~2 + ///
- Example 3 - query with repetitions: "my ho my"~2 + ///
Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection. + ///
+ /// end (max position), or -1 if any term ran out (i.e. done) + /// + /// IOException + private int InitPhrasePositions() + { + int end = 0; + + // no repeats at all (most common case is also the simplest one) + if (checkedRepeats && repeats == null) + { + // build queue from list + pq.Clear(); + for (PhrasePositions pp = first; pp != null; pp = pp.next) + { + pp.FirstPosition(); + if (pp.position > end) + end = pp.position; + pq.Add(pp); // build pq from list + } + return end; + } + + // position the pp's + for (PhrasePositions pp = first; pp != null; pp = pp.next) + pp.FirstPosition(); + + // one time initializatin for this scorer + if (!checkedRepeats) + { + checkedRepeats = true; + // check for repeats + HashMap m = null; + for (PhrasePositions pp = first; pp != null; pp = pp.next) + { + int tpPos = pp.position + pp.offset; + for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) + { + int tpPos2 = pp2.position + pp2.offset; + if (tpPos2 == tpPos) + { + if (m == null) + { + m = new HashMap(); + } + pp.repeats = true; + pp2.repeats = true; + m[pp] = null; + m[pp2] = null; + } + } + } + if (m != null) + { + repeats = m.Keys.ToArray(); + } + } + + // with repeats must advance some repeating pp's so they all start with differing tp's + if (repeats != null) + { + for (int i = 0; i < repeats.Length; i++) + { + PhrasePositions pp = repeats[i]; + PhrasePositions pp2; + while ((pp2 = TermPositionsDiffer(pp)) != null) + { + if (!pp2.NextPosition()) + // out of pps that do not differ, advance the pp with higher offset + return - 1; // ran out of a term -- done + } + } + } + + // build queue from list + pq.Clear(); + for (PhrasePositions pp = first; pp != null; pp = pp.next) + { + if (pp.position > end) + end = pp.position; + pq.Add(pp); // build pq from list + } + + if (repeats != null) + { + tmpPos = new PhrasePositions[pq.Size()]; + } + return end; + } + + /// We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences + /// in the query of the same word would go elsewhere in the matched doc. + /// + /// null if differ (i.e. valid) otherwise return the higher offset PhrasePositions + /// out of the first two PPs found to not differ. + /// + private PhrasePositions TermPositionsDiffer(PhrasePositions pp) + { + // efficiency note: a more efficient implementation could keep a map between repeating + // pp's, so that if pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats + // of term2, pp2a would only be checked against pp2b but not against pp1a, pp1b, pp1c. + // However this would complicate code, for a rather rare case, so choice is to compromise here. + int tpPos = pp.position + pp.offset; + for (int i = 0; i < repeats.Length; i++) + { + PhrasePositions pp2 = repeats[i]; + if (pp2 == pp) + continue; + int tpPos2 = pp2.position + pp2.offset; + if (tpPos2 == tpPos) + return pp.offset > pp2.offset?pp:pp2; // do not differ: return the one with higher offset. + } + return null; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Sort.cs b/external/Lucene.Net.Light/src/core/Search/Sort.cs new file mode 100644 index 0000000000..2a837ebcdc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Sort.cs @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Search +{ + + + /// Encapsulates sort criteria for returned hits. + /// + ///

The fields used to determine sort order must be carefully chosen. + /// Documents must contain a single term in such a field, + /// and the value of the term should indicate the document's relative position in + /// a given sort order. The field must be indexed, but should not be tokenized, + /// and does not need to be stored (unless you happen to want it back with the + /// rest of your document data). In other words: + /// + ///

document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.NOT_ANALYZED));

+ /// + /// + ///

Valid Types of Values

+ /// + ///

There are four possible kinds of term values which may be put into + /// sorting fields: Integers, Longs, Floats, or Strings. Unless + /// SortField objects are specified, the type of value + /// in the field is determined by parsing the first term in the field. + /// + ///

Integer term values should contain only digits and an optional + /// preceding negative sign. Values must be base 10 and in the range + /// Integer.MIN_VALUE and Integer.MAX_VALUE inclusive. + /// Documents which should appear first in the sort + /// should have low value integers, later documents high values + /// (i.e. the documents should be numbered 1..n where + /// 1 is the first and n the last). + /// + ///

Long term values should contain only digits and an optional + /// preceding negative sign. Values must be base 10 and in the range + /// Long.MIN_VALUE and Long.MAX_VALUE inclusive. + /// Documents which should appear first in the sort + /// should have low value integers, later documents high values. + /// + ///

Float term values should conform to values accepted by + /// (except that NaN + /// and Infinity are not supported). + /// Documents which should appear first in the sort + /// should have low values, later documents high values. + /// + ///

String term values can contain any valid String, but should + /// not be tokenized. The values are sorted according to their + /// natural order. Note that using this type + /// of term value has higher memory requirements than the other + /// two types. + /// + ///

Object Reuse

+ /// + ///

One of these objects can be + /// used multiple times and the sort order changed between usages. + /// + ///

This class is thread safe. + /// + ///

Memory Usage

+ /// + ///

Sorting uses of caches of term values maintained by the + /// internal HitQueue(s). The cache is static and contains an integer + /// or float array of length IndexReader.MaxDoc for each field + /// name for which a sort is performed. In other words, the size of the + /// cache in bytes is: + /// + ///

4 * IndexReader.MaxDoc * (# of different fields actually used to sort) + /// + ///

For String fields, the cache is larger: in addition to the + /// above array, the value of every term in the field is kept in memory. + /// If there are many unique terms in the field, this could + /// be quite large. + /// + ///

Note that the size of the cache is not affected by how many + /// fields are in the index and might be used to sort - only by + /// the ones actually used to sort a result set. + /// + ///

Created: Feb 12, 2004 10:53:57 AM + /// + ///

+ [Serializable] + public class Sort + { + + /// Represents sorting by computed relevance. Using this sort criteria returns + /// the same results as calling + /// Searcher#search()without a sort criteria, + /// only with slightly more overhead. + /// + public static readonly Sort RELEVANCE = new Sort(); + + /// Represents sorting by index order. + public static readonly Sort INDEXORDER; + + // internal representation of the sort criteria + internal SortField[] fields; + + /// Sorts by computed relevance. This is the same sort criteria as calling + /// without a sort criteria, + /// only with slightly more overhead. + /// + public Sort():this(SortField.FIELD_SCORE) + { + } + + /// Sorts by the criteria in the given SortField. + public Sort(SortField field) + { + SetSort(field); + } + + /// Sorts in succession by the criteria in each SortField. + public Sort(params SortField[] fields) + { + SetSort(fields); + } + + /// Sets the sort to the given criteria. + public virtual void SetSort(SortField field) + { + this.fields = new SortField[]{field}; + } + + /// Sets the sort to the given criteria in succession. + public virtual void SetSort(params SortField[] fields) + { + this.fields = fields; + } + + /// Representation of the sort criteria. + /// Array of SortField objects used in this sort criteria + /// + public virtual SortField[] GetSort() + { + return fields; + } + + public override System.String ToString() + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + + for (int i = 0; i < fields.Length; i++) + { + buffer.Append(fields[i].ToString()); + if ((i + 1) < fields.Length) + buffer.Append(','); + } + + return buffer.ToString(); + } + + /// Returns true if o is equal to this. + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (!(o is Sort)) + return false; + Sort other = (Sort) o; + + bool result = false; + if ((this.fields == null) && (other.fields == null)) + result = true; + else if ((this.fields != null) && (other.fields != null)) + { + if (this.fields.Length == other.fields.Length) + { + int length = this.fields.Length; + result = true; + for (int i = 0; i < length; i++) + { + if (!(this.fields[i].Equals(other.fields[i]))) + { + result = false; + break; + } + } + } + } + return result; + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + // TODO in Java 1.5: switch to Arrays.hashCode(). The + // Java 1.4 workaround below calculates the same hashCode + // as Java 1.5's new Arrays.hashCode() + return 0x45aaf665 + EquatableList.GetHashCode(fields); + } + static Sort() + { + INDEXORDER = new Sort(SortField.FIELD_DOC); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/SortField.cs b/external/Lucene.Net.Light/src/core/Search/SortField.cs new file mode 100644 index 0000000000..5e06b4828f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/SortField.cs @@ -0,0 +1,512 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Globalization; +using Lucene.Net.Support; +using NumericField = Lucene.Net.Documents.NumericField; +using IndexReader = Lucene.Net.Index.IndexReader; +using Single = Lucene.Net.Support.Single; +using Term = Lucene.Net.Index.Term; +using TermEnum = Lucene.Net.Index.TermEnum; +using StringHelper = Lucene.Net.Util.StringHelper; + +namespace Lucene.Net.Search +{ + + /// Stores information about how to sort documents by terms in an individual + /// field. Fields must be indexed in order to sort by them. + /// + ///

Created: Feb 11, 2004 1:25:29 PM + ///

+ /// + [Serializable] + public class SortField + { + /// Sort by document score (relevancy). Sort values are Float and higher + /// values are at the front. + /// + public const int SCORE = 0; + + /// Sort by document number (index order). Sort values are Integer and lower + /// values are at the front. + /// + public const int DOC = 1; + + // reserved, in Lucene 2.9, there was a constant: AUTO = 2 + + /// Sort using term values as Strings. Sort values are String and lower + /// values are at the front. + /// + public const int STRING = 3; + + /// Sort using term values as encoded Integers. Sort values are Integer and + /// lower values are at the front. + /// + public const int INT = 4; + + /// Sort using term values as encoded Floats. Sort values are Float and + /// lower values are at the front. + /// + public const int FLOAT = 5; + + /// Sort using term values as encoded Longs. Sort values are Long and + /// lower values are at the front. + /// + public const int LONG = 6; + + /// Sort using term values as encoded Doubles. Sort values are Double and + /// lower values are at the front. + /// + public const int DOUBLE = 7; + + /// Sort using term values as encoded Shorts. Sort values are Short and + /// lower values are at the front. + /// + public const int SHORT = 8; + + /// Sort using a custom Comparator. Sort values are any Comparable and + /// sorting is done according to natural order. + /// + public const int CUSTOM = 9; + + /// Sort using term values as encoded Bytes. Sort values are Byte and + /// lower values are at the front. + /// + public const int BYTE = 10; + + /// Sort using term values as Strings, but comparing by + /// value (using String.compareTo) for all comparisons. + /// This is typically slower than , which + /// uses ordinals to do the sorting. + /// + public const int STRING_VAL = 11; + + // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace" + // as the above static int values. Any new values must not have the same value + // as FieldCache.STRING_INDEX. + + /// Represents sorting by document score (relevancy). + public static readonly SortField FIELD_SCORE = new SortField(null, SCORE); + + /// Represents sorting by document number (index order). + public static readonly SortField FIELD_DOC = new SortField(null, DOC); + + private System.String field; + private int type; // defaults to determining type dynamically + private System.Globalization.CultureInfo locale; // defaults to "natural order" (no Locale) + internal bool reverse = false; // defaults to natural order + private Lucene.Net.Search.Parser parser; + + // Used for CUSTOM sort + private FieldComparatorSource comparatorSource; + + /// Creates a sort by terms in the given field with the type of term + /// values explicitly given. + /// + /// Name of field to sort by. Can be null if + /// type is SCORE or DOC. + /// + /// Type of values in the terms. + /// + public SortField(System.String field, int type) + { + InitFieldType(field, type); + } + + /// Creates a sort, possibly in reverse, by terms in the given field with the + /// type of term values explicitly given. + /// + /// Name of field to sort by. Can be null if + /// type is SCORE or DOC. + /// + /// Type of values in the terms. + /// + /// True if natural order should be reversed. + /// + public SortField(System.String field, int type, bool reverse) + { + InitFieldType(field, type); + this.reverse = reverse; + } + + /// Creates a sort by terms in the given field, parsed + /// to numeric values using a custom . + /// + /// Name of field to sort by. Must not be null. + /// + /// Instance of a , + /// which must subclass one of the existing numeric + /// parsers from . Sort type is inferred + /// by testing which numeric parser the parser subclasses. + /// + /// IllegalArgumentException if the parser fails to + /// subclass an existing numeric parser, or field is null + /// + public SortField(System.String field, Lucene.Net.Search.Parser parser):this(field, parser, false) + { + } + + /// Creates a sort, possibly in reverse, by terms in the given field, parsed + /// to numeric values using a custom . + /// + /// Name of field to sort by. Must not be null. + /// + /// Instance of a , + /// which must subclass one of the existing numeric + /// parsers from . Sort type is inferred + /// by testing which numeric parser the parser subclasses. + /// + /// True if natural order should be reversed. + /// + /// IllegalArgumentException if the parser fails to + /// subclass an existing numeric parser, or field is null + /// + public SortField(System.String field, Lucene.Net.Search.Parser parser, bool reverse) + { + if (parser is Lucene.Net.Search.IntParser) + InitFieldType(field, INT); + else if (parser is Lucene.Net.Search.FloatParser) + InitFieldType(field, FLOAT); + else if (parser is Lucene.Net.Search.ShortParser) + InitFieldType(field, SHORT); + else if (parser is Lucene.Net.Search.ByteParser) + InitFieldType(field, BYTE); + else if (parser is Lucene.Net.Search.LongParser) + InitFieldType(field, LONG); + else if (parser is Lucene.Net.Search.DoubleParser) + InitFieldType(field, DOUBLE); + else + { + throw new System.ArgumentException("Parser instance does not subclass existing numeric parser from FieldCache (got " + parser + ")"); + } + + this.reverse = reverse; + this.parser = parser; + } + + /// Creates a sort by terms in the given field sorted + /// according to the given locale. + /// + /// Name of field to sort by, cannot be null. + /// + /// Locale of values in the field. + /// + public SortField(System.String field, System.Globalization.CultureInfo locale) + { + InitFieldType(field, STRING); + this.locale = locale; + } + + /// Creates a sort, possibly in reverse, by terms in the given field sorted + /// according to the given locale. + /// + /// Name of field to sort by, cannot be null. + /// + /// Locale of values in the field. + /// + public SortField(System.String field, System.Globalization.CultureInfo locale, bool reverse) + { + InitFieldType(field, STRING); + this.locale = locale; + this.reverse = reverse; + } + + /// Creates a sort with a custom comparison function. + /// Name of field to sort by; cannot be null. + /// + /// Returns a comparator for sorting hits. + /// + public SortField(System.String field, FieldComparatorSource comparator) + { + InitFieldType(field, CUSTOM); + this.comparatorSource = comparator; + } + + /// Creates a sort, possibly in reverse, with a custom comparison function. + /// Name of field to sort by; cannot be null. + /// + /// Returns a comparator for sorting hits. + /// + /// True if natural order should be reversed. + /// + public SortField(System.String field, FieldComparatorSource comparator, bool reverse) + { + InitFieldType(field, CUSTOM); + this.reverse = reverse; + this.comparatorSource = comparator; + } + + // Sets field & type, and ensures field is not NULL unless + // type is SCORE or DOC + private void InitFieldType(System.String field, int type) + { + this.type = type; + if (field == null) + { + if (type != SCORE && type != DOC) + throw new System.ArgumentException("field can only be null when type is SCORE or DOC"); + } + else + { + this.field = StringHelper.Intern(field); + } + } + + /// Returns the name of the field. Could return null + /// if the sort is by SCORE or DOC. + /// + /// Name of field, possibly <c>null</c>. + public virtual string Field + { + get { return field; } + } + + /// Returns the type of contents in the field. + /// One of the constants SCORE, DOC, STRING, INT or FLOAT. + public virtual int Type + { + get { return type; } + } + + /// Returns the Locale by which term values are interpreted. + /// May return null if no Locale was specified. + /// + /// Locale, or <c>null</c>. + public virtual CultureInfo Locale + { + get { return locale; } + } + + /// Returns the instance of a parser that fits to the given sort type. + /// May return null if no parser was specified. Sorting is using the default parser then. + /// + /// An instance of a <see cref="FieldCache" /> parser, or <c>null</c>. + public virtual Parser Parser + { + get { return parser; } + } + + /// Returns whether the sort should be reversed. + /// True if natural order should be reversed. + public virtual bool Reverse + { + get { return reverse; } + } + + /// + /// Returns the used for + /// custom sorting + /// + public virtual FieldComparatorSource ComparatorSource + { + get { return comparatorSource; } + } + + public override System.String ToString() + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + switch (type) + { + + case SCORE: + buffer.Append(""); + break; + + case DOC: + buffer.Append(""); + break; + + case STRING: + buffer.Append(""); + break; + + case STRING_VAL: + buffer.Append(""); + break; + + case BYTE: + buffer.Append(""); + break; + + case SHORT: + buffer.Append(""); + break; + + case INT: + buffer.Append(""); + break; + + case LONG: + buffer.Append(""); + break; + + case FLOAT: + buffer.Append(""); + break; + + case DOUBLE: + buffer.Append(""); + break; + + case CUSTOM: + buffer.Append("'); + break; + + default: + buffer.Append(""); + break; + + } + + if (locale != null) + buffer.Append('(').Append(locale).Append(')'); + if (parser != null) + buffer.Append('(').Append(parser).Append(')'); + if (reverse) + buffer.Append('!'); + + return buffer.ToString(); + } + + /// Returns true if o is equal to this. If a + /// or + /// was provided, it must properly + /// implement equals (unless a singleton is always used). + /// + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (!(o is SortField)) + return false; + SortField other = (SortField) o; + return ((System.Object) other.field == (System.Object) this.field && other.type == this.type && + other.reverse == this.reverse && + (other.locale == null ? this.locale == null : other.locale.Equals(this.locale)) && + (other.comparatorSource == null + ? this.comparatorSource == null + : other.comparatorSource.Equals(this.comparatorSource)) && + (other.parser == null ? this.parser == null : other.parser.Equals(this.parser))); + } + + /// Returns true if o is equal to this. If a + /// (deprecated) or + /// was provided, it must properly + /// implement hashCode (unless a singleton is always + /// used). + /// + public override int GetHashCode() + { + int hash = type ^ 0x346565dd + (reverse ? Boolean.TrueString.GetHashCode() : Boolean.FalseString.GetHashCode()) ^ unchecked((int) 0xaf5998bb); + if (field != null) + hash += (field.GetHashCode() ^ unchecked((int) 0xff5685dd)); + if (locale != null) + { + hash += (locale.GetHashCode() ^ 0x08150815); + } + if (comparatorSource != null) + hash += comparatorSource.GetHashCode(); + if (parser != null) + hash += (parser.GetHashCode() ^ 0x3aaf56ff); + return hash; + } + + + //// field must be interned after reading from stream + // private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + // in.defaultReadObject(); + // if (field != null) + // field = StringHelper.intern(field); + // } + + [System.Runtime.Serialization.OnDeserialized] + internal void OnDeserialized(System.Runtime.Serialization.StreamingContext context) + { + field = StringHelper.Intern(field); + } + + /// Returns the to use for + /// sorting. + /// + /// NOTE: This API is experimental and might change in + /// incompatible ways in the next release. + /// + /// + /// number of top hits the queue will store + /// + /// position of this SortField within + ///. The comparator is primary if sortPos==0, + /// secondary if sortPos==1, etc. Some comparators can + /// optimize themselves when they are the primary sort. + /// + /// to use when sorting + /// + public virtual FieldComparator GetComparator(int numHits, int sortPos) + { + + if (locale != null) + { + // TODO: it'd be nice to allow FieldCache.getStringIndex + // to optionally accept a Locale so sorting could then use + // the faster StringComparator impls + return new FieldComparator.StringComparatorLocale(numHits, field, locale); + } + + switch (type) + { + case SortField.SCORE: + return new FieldComparator.RelevanceComparator(numHits); + + case SortField.DOC: + return new FieldComparator.DocComparator(numHits); + + case SortField.INT: + return new FieldComparator.IntComparator(numHits, field, parser); + + case SortField.FLOAT: + return new FieldComparator.FloatComparator(numHits, field, parser); + + case SortField.LONG: + return new FieldComparator.LongComparator(numHits, field, parser); + + case SortField.DOUBLE: + return new FieldComparator.DoubleComparator(numHits, field, parser); + + case SortField.BYTE: + return new FieldComparator.ByteComparator(numHits, field, parser); + + case SortField.SHORT: + return new FieldComparator.ShortComparator(numHits, field, parser); + + case SortField.CUSTOM: + System.Diagnostics.Debug.Assert(comparatorSource != null); + return comparatorSource.NewComparator(field, numHits, sortPos, reverse); + + case SortField.STRING: + return new FieldComparator.StringOrdValComparator(numHits, field, sortPos, reverse); + + case SortField.STRING_VAL: + return new FieldComparator.StringValComparator(numHits, field); + + default: + throw new System.SystemException("Illegal sort type: " + type); + + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/SpanFilter.cs b/external/Lucene.Net.Light/src/core/Search/SpanFilter.cs new file mode 100644 index 0000000000..f522725b24 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/SpanFilter.cs @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// Abstract base class providing a mechanism to restrict searches to a subset + /// of an index and also maintains and returns position information. + /// This is useful if you want to compare the positions from a SpanQuery with the positions of items in + /// a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, + /// and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could + /// then compare position information for post processing. + /// + [Serializable] + public abstract class SpanFilter:Filter + { + /// Returns a SpanFilterResult with true for documents which should be permitted in + /// search results, and false for those that should not and Spans for where the true docs match. + /// + /// The to load position and DocIdSet information from + /// + /// A + /// + /// java.io.IOException if there was an issue accessing the necessary information + /// + /// + public abstract SpanFilterResult BitSpans(IndexReader reader); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/SpanFilterResult.cs b/external/Lucene.Net.Light/src/core/Search/SpanFilterResult.cs new file mode 100644 index 0000000000..9cafe0cd1a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/SpanFilterResult.cs @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Search +{ + /// The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery + /// + ///

+ /// NOTE: This API is still experimental and subject to change. + ///

+ public class SpanFilterResult + { + private DocIdSet docIdSet; + private IList positions; //Spans spans; + + /// + /// The DocIdSet for the Filter + /// + /// A List of objects + /// + public SpanFilterResult(DocIdSet docIdSet, IList positions) + { + this.docIdSet = docIdSet; + this.positions = positions; + } + + /// The first entry in the array corresponds to the first "on" bit. + /// Entries are increasing by document order + /// + /// A List of PositionInfo objects + public virtual IList Positions + { + get { return positions; } + } + + /// Returns the docIdSet + public virtual DocIdSet DocIdSet + { + get { return docIdSet; } + } + + public class PositionInfo + { + private int doc; + private IList positions; + + + public PositionInfo(int doc) + { + this.doc = doc; + positions = new List(); + } + + public virtual void AddPosition(int start, int end) + { + positions.Add(new StartEnd(start, end)); + } + + public virtual int Doc + { + get { return doc; } + } + + /// + /// A List of <see cref="Lucene.Net.Search.SpanFilterResult.StartEnd" /> objects + public virtual IList Positions + { + get { return positions; } + } + } + + public class StartEnd + { + private int start; + private int end; + + + public StartEnd(int start, int end) + { + this.start = start; + this.end = end; + } + + /// + /// The end position of this match + public virtual int End + { + get { return end; } + } + + /// The Start position + /// The start position of this match + public virtual int Start + { + get { return start; } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/SpanQueryFilter.cs b/external/Lucene.Net.Light/src/core/Search/SpanQueryFilter.cs new file mode 100644 index 0000000000..af3de6bad0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/SpanQueryFilter.cs @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using IndexReader = Lucene.Net.Index.IndexReader; +using OpenBitSet = Lucene.Net.Util.OpenBitSet; +using SpanQuery = Lucene.Net.Search.Spans.SpanQuery; + +namespace Lucene.Net.Search +{ + + /// Constrains search results to only match those which also match a provided + /// query. Also provides position information about where each document matches + /// at the cost of extra space compared with the QueryWrapperFilter. + /// There is an added cost to this above what is stored in a . Namely, + /// the position information for each matching document is stored. + ///

+ /// This filter does not cache. See the for a wrapper that + /// caches. + /// + /// + ///

+ /// $Id:$ + /// + [Serializable] + public class SpanQueryFilter:SpanFilter + { + protected internal SpanQuery internalQuery; + + protected internal SpanQueryFilter() + { + } + + /// Constructs a filter which only matches documents matching + /// query. + /// + /// The to use as the basis for the Filter. + /// + public SpanQueryFilter(SpanQuery query) + { + this.internalQuery = query; + } + + public override DocIdSet GetDocIdSet(IndexReader reader) + { + SpanFilterResult result = BitSpans(reader); + return result.DocIdSet; + } + + public override SpanFilterResult BitSpans(IndexReader reader) + { + + OpenBitSet bits = new OpenBitSet(reader.MaxDoc); + Lucene.Net.Search.Spans.Spans spans = internalQuery.GetSpans(reader); + IList tmp = new List(20); + int currentDoc = - 1; + SpanFilterResult.PositionInfo currentInfo = null; + while (spans.Next()) + { + int doc = spans.Doc(); + bits.Set(doc); + if (currentDoc != doc) + { + currentInfo = new SpanFilterResult.PositionInfo(doc); + tmp.Add(currentInfo); + currentDoc = doc; + } + currentInfo.AddPosition(spans.Start(), spans.End()); + } + return new SpanFilterResult(bits, tmp); + } + + + public virtual SpanQuery Query + { + get { return internalQuery; } + } + + public override System.String ToString() + { + return "SpanQueryFilter(" + internalQuery + ")"; + } + + public override bool Equals(System.Object o) + { + return o is SpanQueryFilter && this.internalQuery.Equals(((SpanQueryFilter) o).internalQuery); + } + + public override int GetHashCode() + { + return internalQuery.GetHashCode() ^ unchecked((int) 0x923F64B9); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/FieldMaskingSpanQuery.cs b/external/Lucene.Net.Light/src/core/Search/Spans/FieldMaskingSpanQuery.cs new file mode 100644 index 0000000000..b69a58d1bb --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/FieldMaskingSpanQuery.cs @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Index; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using Query = Lucene.Net.Search.Query; +using Searcher = Lucene.Net.Search.Searcher; +using Similarity = Lucene.Net.Search.Similarity; +using Weight = Lucene.Net.Search.Weight; + +namespace Lucene.Net.Search.Spans +{ + + ///

Wrapper to allow objects participate in composite + /// single-field SpanQueries by 'lying' about their search field. That is, + /// the masked SpanQuery will function as normal, + /// but simply hands back the value supplied + /// in this class's constructor.

+ /// + ///

This can be used to support Queries like or + /// across different fields, which is not ordinarily + /// permitted.

+ /// + ///

This can be useful for denormalized relational data: for example, when + /// indexing a document with conceptually many 'children':

+ /// + ///

+	/// teacherid: 1
+	/// studentfirstname: james
+	/// studentsurname: jones
+	/// 
+	/// teacherid: 2
+	/// studenfirstname: james
+	/// studentsurname: smith
+	/// studentfirstname: sally
+	/// studentsurname: jones
+	/// 
+ /// + ///

a SpanNearQuery with a slop of 0 can be applied across two + /// objects as follows: + /// + /// SpanQuery q1 = new SpanTermQuery(new Term("studentfirstname", "james")); + /// SpanQuery q2 = new SpanTermQuery(new Term("studentsurname", "jones")); + /// SpanQuery q2m new FieldMaskingSpanQuery(q2, "studentfirstname"); + /// Query q = new SpanNearQuery(new SpanQuery[]{q1, q2m}, -1, false); + /// + /// to search for 'studentfirstname:james studentsurname:jones' and find + /// teacherid 1 without matching teacherid 2 (which has a 'james' in position 0 + /// and 'jones' in position 1).

+ /// + ///

Note: as returns the masked field, scoring will be + /// done using the norms of the field name supplied. This may lead to unexpected + /// scoring behaviour.

+ ///

+ [Serializable] + public class FieldMaskingSpanQuery:SpanQuery + { + private SpanQuery maskedQuery; + private System.String field; + + public FieldMaskingSpanQuery(SpanQuery maskedQuery, System.String maskedField) + { + this.maskedQuery = maskedQuery; + this.field = maskedField; + } + + public override string Field + { + get { return field; } + } + + public virtual SpanQuery MaskedQuery + { + get { return maskedQuery; } + } + + // :NOTE: getBoost and setBoost are not proxied to the maskedQuery + // ...this is done to be more consistent with thigns like SpanFirstQuery + + public override Spans GetSpans(IndexReader reader) + { + return maskedQuery.GetSpans(reader); + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + maskedQuery.ExtractTerms(terms); + } + + public override Weight CreateWeight(Searcher searcher) + { + return maskedQuery.CreateWeight(searcher); + } + + public override Similarity GetSimilarity(Searcher searcher) + { + return maskedQuery.GetSimilarity(searcher); + } + + public override Query Rewrite(IndexReader reader) + { + FieldMaskingSpanQuery clone = null; + + SpanQuery rewritten = (SpanQuery) maskedQuery.Rewrite(reader); + if (rewritten != maskedQuery) + { + clone = (FieldMaskingSpanQuery) this.Clone(); + clone.maskedQuery = rewritten; + } + + if (clone != null) + { + return clone; + } + else + { + return this; + } + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("mask("); + buffer.Append(maskedQuery.ToString(field)); + buffer.Append(")"); + buffer.Append(ToStringUtils.Boost(Boost)); + buffer.Append(" as "); + buffer.Append(this.field); + return buffer.ToString(); + } + + public override bool Equals(System.Object o) + { + if (!(o is FieldMaskingSpanQuery)) + return false; + FieldMaskingSpanQuery other = (FieldMaskingSpanQuery) o; + return (this.Field.Equals(other.Field) && (this.Boost == other.Boost) && this.MaskedQuery.Equals(other.MaskedQuery)); + } + + public override int GetHashCode() + { + return MaskedQuery.GetHashCode() ^ Field.GetHashCode() ^ System.Convert.ToInt32(Boost); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/NearSpansOrdered.cs b/external/Lucene.Net.Light/src/core/Search/Spans/NearSpansOrdered.cs new file mode 100644 index 0000000000..86ffae8f6d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/NearSpansOrdered.cs @@ -0,0 +1,436 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search.Spans +{ + + /// A Spans that is formed from the ordered subspans of a SpanNearQuery + /// where the subspans do not overlap and have a maximum slop between them. + ///

+ /// The formed spans only contains minimum slop matches.
+ /// The matching slop is computed from the distance(s) between + /// the non overlapping matching Spans.
+ /// Successive matches are always formed from the successive Spans + /// of the SpanNearQuery. + ///

+ /// The formed spans may contain overlaps when the slop is at least 1. + /// For example, when querying using + /// t1 t2 t3 + /// with slop at least 1, the fragment: + /// t1 t2 t1 t3 t2 t3 + /// matches twice: + /// t1 t2 .. t3 + /// t1 .. t2 t3 + /// + /// + /// Expert: + /// Only public for subclassing. Most implementations should not need this class + ///

+ public class NearSpansOrdered:Spans + { + internal class AnonymousClassComparator : System.Collections.IComparer + { + public AnonymousClassComparator(NearSpansOrdered enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(NearSpansOrdered enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private NearSpansOrdered enclosingInstance; + public NearSpansOrdered Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public virtual int Compare(System.Object o1, System.Object o2) + { + return ((Spans) o1).Doc() - ((Spans) o2).Doc(); + } + } + private void InitBlock() + { + spanDocComparator = new AnonymousClassComparator(this); + } + private int allowedSlop; + private bool firstTime = true; + private bool more = false; + + /// The spans in the same order as the SpanNearQuery + private Spans[] subSpans; + + /// Indicates that all subSpans have same doc() + private bool inSameDoc = false; + + private int matchDoc = - 1; + private int matchStart = - 1; + private int matchEnd = - 1; + private System.Collections.Generic.List matchPayload; + + private Spans[] subSpansByDoc; + private System.Collections.IComparer spanDocComparator; + + private SpanNearQuery query; + private bool collectPayloads = true; + + public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader):this(spanNearQuery, reader, true) + { + } + + public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader, bool collectPayloads) + { + InitBlock(); + if (spanNearQuery.GetClauses().Length < 2) + { + throw new System.ArgumentException("Less than 2 clauses: " + spanNearQuery); + } + this.collectPayloads = collectPayloads; + allowedSlop = spanNearQuery.Slop; + SpanQuery[] clauses = spanNearQuery.GetClauses(); + subSpans = new Spans[clauses.Length]; + matchPayload = new System.Collections.Generic.List(); + subSpansByDoc = new Spans[clauses.Length]; + for (int i = 0; i < clauses.Length; i++) + { + subSpans[i] = clauses[i].GetSpans(reader); + subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() + } + query = spanNearQuery; // kept for toString() only. + } + + // inherit javadocs + public override int Doc() + { + return matchDoc; + } + + // inherit javadocs + public override int Start() + { + return matchStart; + } + + // inherit javadocs + public override int End() + { + return matchEnd; + } + + public virtual Spans[] GetSubSpans() + { + return subSpans; + } + + // TODO: Remove warning after API has been finalized + // TODO: Would be nice to be able to lazy load payloads + + public override ICollection GetPayload() + { + return matchPayload; + } + + // TODO: Remove warning after API has been finalized + + public override bool IsPayloadAvailable() + { + return (matchPayload.Count == 0) == false; + } + + // inherit javadocs + public override bool Next() + { + if (firstTime) + { + firstTime = false; + for (int i = 0; i < subSpans.Length; i++) + { + if (!subSpans[i].Next()) + { + more = false; + return false; + } + } + more = true; + } + if (collectPayloads) + { + matchPayload.Clear(); + } + return AdvanceAfterOrdered(); + } + + // inherit javadocs + public override bool SkipTo(int target) + { + if (firstTime) + { + firstTime = false; + for (int i = 0; i < subSpans.Length; i++) + { + if (!subSpans[i].SkipTo(target)) + { + more = false; + return false; + } + } + more = true; + } + else if (more && (subSpans[0].Doc() < target)) + { + if (subSpans[0].SkipTo(target)) + { + inSameDoc = false; + } + else + { + more = false; + return false; + } + } + if (collectPayloads) + { + matchPayload.Clear(); + } + return AdvanceAfterOrdered(); + } + + /// Advances the subSpans to just after an ordered match with a minimum slop + /// that is smaller than the slop allowed by the SpanNearQuery. + /// + /// true iff there is such a match. + /// + private bool AdvanceAfterOrdered() + { + while (more && (inSameDoc || ToSameDoc())) + { + if (StretchToOrder() && ShrinkToAfterShortestMatch()) + { + return true; + } + } + return false; // no more matches + } + + + /// Advance the subSpans to the same document + private bool ToSameDoc() + { + System.Array.Sort(subSpansByDoc, spanDocComparator); + int firstIndex = 0; + int maxDoc = subSpansByDoc[subSpansByDoc.Length - 1].Doc(); + while (subSpansByDoc[firstIndex].Doc() != maxDoc) + { + if (!subSpansByDoc[firstIndex].SkipTo(maxDoc)) + { + more = false; + inSameDoc = false; + return false; + } + maxDoc = subSpansByDoc[firstIndex].Doc(); + if (++firstIndex == subSpansByDoc.Length) + { + firstIndex = 0; + } + } + for (int i = 0; i < subSpansByDoc.Length; i++) + { + System.Diagnostics.Debug.Assert((subSpansByDoc [i].Doc() == maxDoc) + , "NearSpansOrdered.toSameDoc() spans " + subSpansByDoc [0] + + "\n at doc " + subSpansByDoc [i].Doc() + + ", but should be at " + maxDoc); + } + inSameDoc = true; + return true; + } + + /// Check whether two Spans in the same document are ordered. + /// + /// + /// + /// + /// true iff spans1 starts before spans2 + /// or the spans start at the same position, + /// and spans1 ends before spans2. + /// + internal static bool DocSpansOrdered(Spans spans1, Spans spans2) + { + System.Diagnostics.Debug.Assert(spans1.Doc() == spans2.Doc(), "doc1 " + spans1.Doc() + " != doc2 " + spans2.Doc()); + int start1 = spans1.Start(); + int start2 = spans2.Start(); + /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */ + return (start1 == start2)?(spans1.End() < spans2.End()):(start1 < start2); + } + + /// Like , but use the spans + /// starts and ends as parameters. + /// + private static bool DocSpansOrdered(int start1, int end1, int start2, int end2) + { + return (start1 == start2)?(end1 < end2):(start1 < start2); + } + + /// Order the subSpans within the same document by advancing all later spans + /// after the previous one. + /// + private bool StretchToOrder() + { + matchDoc = subSpans[0].Doc(); + for (int i = 1; inSameDoc && (i < subSpans.Length); i++) + { + while (!DocSpansOrdered(subSpans[i - 1], subSpans[i])) + { + if (!subSpans[i].Next()) + { + inSameDoc = false; + more = false; + break; + } + else if (matchDoc != subSpans[i].Doc()) + { + inSameDoc = false; + break; + } + } + } + return inSameDoc; + } + + /// The subSpans are ordered in the same doc, so there is a possible match. + /// Compute the slop while making the match as short as possible by advancing + /// all subSpans except the last one in reverse order. + /// + private bool ShrinkToAfterShortestMatch() + { + matchStart = subSpans[subSpans.Length - 1].Start(); + matchEnd = subSpans[subSpans.Length - 1].End(); + System.Collections.Generic.Dictionary possibleMatchPayloads = new System.Collections.Generic.Dictionary(); + if (subSpans[subSpans.Length - 1].IsPayloadAvailable()) + { + System.Collections.Generic.ICollection payload = subSpans[subSpans.Length - 1].GetPayload(); + foreach(byte[] pl in payload) + { + if (!possibleMatchPayloads.ContainsKey(pl)) + { + possibleMatchPayloads.Add(pl, pl); + } + } + } + + System.Collections.Generic.List possiblePayload = null; + + int matchSlop = 0; + int lastStart = matchStart; + int lastEnd = matchEnd; + for (int i = subSpans.Length - 2; i >= 0; i--) + { + Spans prevSpans = subSpans[i]; + if (collectPayloads && prevSpans.IsPayloadAvailable()) + { + System.Collections.Generic.ICollection payload = prevSpans.GetPayload(); + possiblePayload = new System.Collections.Generic.List(payload.Count); + possiblePayload.AddRange(payload); + } + + int prevStart = prevSpans.Start(); + int prevEnd = prevSpans.End(); + while (true) + { + // Advance prevSpans until after (lastStart, lastEnd) + if (!prevSpans.Next()) + { + inSameDoc = false; + more = false; + break; // Check remaining subSpans for final match. + } + else if (matchDoc != prevSpans.Doc()) + { + inSameDoc = false; // The last subSpans is not advanced here. + break; // Check remaining subSpans for last match in this document. + } + else + { + int ppStart = prevSpans.Start(); + int ppEnd = prevSpans.End(); // Cannot avoid invoking .end() + if (!DocSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) + { + break; // Check remaining subSpans. + } + else + { + // prevSpans still before (lastStart, lastEnd) + prevStart = ppStart; + prevEnd = ppEnd; + if (collectPayloads && prevSpans.IsPayloadAvailable()) + { + System.Collections.Generic.ICollection payload = prevSpans.GetPayload(); + possiblePayload = new System.Collections.Generic.List(payload.Count); + possiblePayload.AddRange(payload); + } + } + } + } + + if (collectPayloads && possiblePayload != null) + { + foreach (byte[] pl in possiblePayload) + { + if (!possibleMatchPayloads.ContainsKey(pl)) + { + possibleMatchPayloads.Add(pl, pl); + } + } + } + + System.Diagnostics.Debug.Assert(prevStart <= matchStart); + if (matchStart > prevEnd) + { + // Only non overlapping spans add to slop. + matchSlop += (matchStart - prevEnd); + } + + /* Do not break on (matchSlop > allowedSlop) here to make sure + * that subSpans[0] is advanced after the match, if any. + */ + matchStart = prevStart; + lastStart = prevStart; + lastEnd = prevEnd; + } + + bool match = matchSlop <= allowedSlop; + + if (collectPayloads && match && possibleMatchPayloads.Count > 0) + { + matchPayload.AddRange(possibleMatchPayloads.Keys); + } + + return match; // ordered and allowed slop + } + + public override System.String ToString() + { + return GetType().FullName + "(" + query.ToString() + ")@" + (firstTime?"START":(more?(Doc() + ":" + Start() + "-" + End()):"END")); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/NearSpansUnordered.cs b/external/Lucene.Net.Light/src/core/Search/Spans/NearSpansUnordered.cs new file mode 100644 index 0000000000..e926827811 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/NearSpansUnordered.cs @@ -0,0 +1,415 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Util; +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search.Spans +{ + + /// Similar to , but for the unordered case. + /// + /// Expert: + /// Only public for subclassing. Most implementations should not need this class + /// + public class NearSpansUnordered : Spans + { + private SpanNearQuery query; + + private System.Collections.Generic.IList ordered = new System.Collections.Generic.List(); // spans in query order + private Spans[] subSpans; + private int slop; // from query + + private SpansCell first; // linked list of spans + private SpansCell last; // sorted by doc only + + private int totalLength; // sum of current lengths + + private CellQueue queue; // sorted queue of spans + private SpansCell max; // max element in queue + + private bool more = true; // true iff not done + private bool firstTime = true; // true before first next() + + private class CellQueue : PriorityQueue + { + private void InitBlock(NearSpansUnordered enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private NearSpansUnordered enclosingInstance; + public NearSpansUnordered Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public CellQueue(NearSpansUnordered enclosingInstance, int size) + { + InitBlock(enclosingInstance); + Initialize(size); + } + + public override bool LessThan(SpansCell spans1, SpansCell spans2) + { + if (spans1.Doc() == spans2.Doc()) + { + return NearSpansOrdered.DocSpansOrdered(spans1, spans2); + } + else + { + return spans1.Doc() < spans2.Doc(); + } + } + } + + + /// Wraps a Spans, and can be used to form a linked list. + private class SpansCell:Spans + { + private void InitBlock(NearSpansUnordered enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private NearSpansUnordered enclosingInstance; + public NearSpansUnordered Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal /*private*/ Spans spans; + internal /*private*/ SpansCell next; + private int length = - 1; + private int index; + + public SpansCell(NearSpansUnordered enclosingInstance, Spans spans, int index) + { + InitBlock(enclosingInstance); + this.spans = spans; + this.index = index; + } + + public override bool Next() + { + return Adjust(spans.Next()); + } + + public override bool SkipTo(int target) + { + return Adjust(spans.SkipTo(target)); + } + + private bool Adjust(bool condition) + { + if (length != - 1) + { + Enclosing_Instance.totalLength -= length; // subtract old length + } + if (condition) + { + length = End() - Start(); + Enclosing_Instance.totalLength += length; // add new length + + if (Enclosing_Instance.max == null || Doc() > Enclosing_Instance.max.Doc() || (Doc() == Enclosing_Instance.max.Doc()) && (End() > Enclosing_Instance.max.End())) + { + Enclosing_Instance.max = this; + } + } + Enclosing_Instance.more = condition; + return condition; + } + + public override int Doc() + { + return spans.Doc(); + } + public override int Start() + { + return spans.Start(); + } + public override int End() + { + return spans.End(); + } + // TODO: Remove warning after API has been finalized + + public override ICollection GetPayload() + { + return spans.GetPayload().ToArray(); + } + + // TODO: Remove warning after API has been finalized + + public override bool IsPayloadAvailable() + { + return spans.IsPayloadAvailable(); + } + + public override System.String ToString() + { + return spans.ToString() + "#" + index; + } + } + + + public NearSpansUnordered(SpanNearQuery query, IndexReader reader) + { + this.query = query; + this.slop = query.Slop; + + SpanQuery[] clauses = query.GetClauses(); + queue = new CellQueue(this, clauses.Length); + subSpans = new Spans[clauses.Length]; + for (int i = 0; i < clauses.Length; i++) + { + SpansCell cell = new SpansCell(this, clauses[i].GetSpans(reader), i); + ordered.Add(cell); + subSpans[i] = cell.spans; + } + } + public virtual Spans[] GetSubSpans() + { + return subSpans; + } + public override bool Next() + { + if (firstTime) + { + InitList(true); + ListToQueue(); // initialize queue + firstTime = false; + } + else if (more) + { + if (Min().Next()) + { + // trigger further scanning + queue.UpdateTop(); // maintain queue + } + else + { + more = false; + } + } + + while (more) + { + + bool queueStale = false; + + if (Min().Doc() != max.Doc()) + { + // maintain list + QueueToList(); + queueStale = true; + } + + // skip to doc w/ all clauses + + while (more && first.Doc() < last.Doc()) + { + more = first.SkipTo(last.Doc()); // skip first upto last + FirstToLast(); // and move it to the end + queueStale = true; + } + + if (!more) + return false; + + // found doc w/ all clauses + + if (queueStale) + { + // maintain the queue + ListToQueue(); + queueStale = false; + } + + if (AtMatch()) + { + return true; + } + + more = Min().Next(); + if (more) + { + queue.UpdateTop(); // maintain queue + } + } + return false; // no more matches + } + + public override bool SkipTo(int target) + { + if (firstTime) + { + // initialize + InitList(false); + for (SpansCell cell = first; more && cell != null; cell = cell.next) + { + more = cell.SkipTo(target); // skip all + } + if (more) + { + ListToQueue(); + } + firstTime = false; + } + else + { + // normal case + while (more && Min().Doc() < target) + { + // skip as needed + if (Min().SkipTo(target)) + { + queue.UpdateTop(); + } + else + { + more = false; + } + } + } + return more && (AtMatch() || Next()); + } + + private SpansCell Min() + { + return queue.Top(); + } + + public override int Doc() + { + return Min().Doc(); + } + public override int Start() + { + return Min().Start(); + } + public override int End() + { + return max.End(); + } + + // TODO: Remove warning after API has been finalized + + /// WARNING: The List is not necessarily in order of the the positions + /// Collection of &lt;c&gt;byte[]&lt;/c&gt; payloads + /// IOException + public override ICollection GetPayload() + { + System.Collections.Generic.ISet matchPayload = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); + for (SpansCell cell = first; cell != null; cell = cell.next) + { + if (cell.IsPayloadAvailable()) + { + matchPayload.UnionWith(cell.GetPayload()); + } + } + return matchPayload; + } + + // TODO: Remove warning after API has been finalized + + public override bool IsPayloadAvailable() + { + SpansCell pointer = Min(); + while (pointer != null) + { + if (pointer.IsPayloadAvailable()) + { + return true; + } + pointer = pointer.next; + } + + return false; + } + + public override System.String ToString() + { + return GetType().FullName + "(" + query.ToString() + ")@" + (firstTime?"START":(more?(Doc() + ":" + Start() + "-" + End()):"END")); + } + + private void InitList(bool next) + { + for (int i = 0; more && i < ordered.Count; i++) + { + SpansCell cell = ordered[i]; + if (next) + more = cell.Next(); // move to first entry + if (more) + { + AddToList(cell); // add to list + } + } + } + + private void AddToList(SpansCell cell) + { + if (last != null) + { + // add next to end of list + last.next = cell; + } + else + first = cell; + last = cell; + cell.next = null; + } + + private void FirstToLast() + { + last.next = first; // move first to end of list + last = first; + first = first.next; + last.next = null; + } + + private void QueueToList() + { + last = first = null; + while (queue.Top() != null) + { + AddToList(queue.Pop()); + } + } + + private void ListToQueue() + { + queue.Clear(); // rebuild queue + for (SpansCell cell = first; cell != null; cell = cell.next) + { + queue.Add(cell); // add to queue from list + } + } + + private bool AtMatch() + { + return (Min().Doc() == max.Doc()) && ((max.End() - Min().Start() - totalLength) <= slop); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/SpanFirstQuery.cs b/external/Lucene.Net.Light/src/core/Search/Spans/SpanFirstQuery.cs new file mode 100644 index 0000000000..ff39b28be8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/SpanFirstQuery.cs @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Index; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using Query = Lucene.Net.Search.Query; + +namespace Lucene.Net.Search.Spans +{ + + /// Matches spans near the beginning of a field. + [Serializable] + public class SpanFirstQuery : SpanQuery, System.ICloneable + { + private class AnonymousClassSpans : Spans + { + public AnonymousClassSpans(Lucene.Net.Index.IndexReader reader, SpanFirstQuery enclosingInstance) + { + InitBlock(reader, enclosingInstance); + } + private void InitBlock(Lucene.Net.Index.IndexReader reader, SpanFirstQuery enclosingInstance) + { + this.reader = reader; + this.enclosingInstance = enclosingInstance; + spans = Enclosing_Instance.match.GetSpans(reader); + } + private Lucene.Net.Index.IndexReader reader; + private SpanFirstQuery enclosingInstance; + public SpanFirstQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private Spans spans; + + public override bool Next() + { + while (spans.Next()) + { + // scan to next match + if (End() <= Enclosing_Instance.end) + return true; + } + return false; + } + + public override bool SkipTo(int target) + { + if (!spans.SkipTo(target)) + return false; + + return spans.End() <= Enclosing_Instance.end || Next(); + } + + public override int Doc() + { + return spans.Doc(); + } + public override int Start() + { + return spans.Start(); + } + public override int End() + { + return spans.End(); + } + + // TODO: Remove warning after API has been finalized + + public override ICollection GetPayload() + { + System.Collections.Generic.ICollection result = null; + if (spans.IsPayloadAvailable()) + { + result = spans.GetPayload(); + } + return result; //TODO: any way to avoid the new construction? + } + + // TODO: Remove warning after API has been finalized + + public override bool IsPayloadAvailable() + { + return spans.IsPayloadAvailable(); + } + + public override System.String ToString() + { + return "spans(" + Enclosing_Instance.ToString() + ")"; + } + } + private SpanQuery match; + private int end; + + /// Construct a SpanFirstQuery matching spans in match whose end + /// position is less than or equal to end. + /// + public SpanFirstQuery(SpanQuery match, int end) + { + this.match = match; + this.end = end; + } + + /// Return the SpanQuery whose matches are filtered. + public virtual SpanQuery Match + { + get { return match; } + } + + /// Return the maximum end position permitted in a match. + public virtual int End + { + get { return end; } + } + + public override string Field + { + get { return match.Field; } + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("spanFirst("); + buffer.Append(match.ToString(field)); + buffer.Append(", "); + buffer.Append(end); + buffer.Append(")"); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + public override System.Object Clone() + { + SpanFirstQuery spanFirstQuery = new SpanFirstQuery((SpanQuery) match.Clone(), end); + spanFirstQuery.Boost = Boost; + return spanFirstQuery; + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + match.ExtractTerms(terms); + } + + public override Spans GetSpans(IndexReader reader) + { + return new AnonymousClassSpans(reader, this); + } + + public override Query Rewrite(IndexReader reader) + { + SpanFirstQuery clone = null; + + SpanQuery rewritten = (SpanQuery) match.Rewrite(reader); + if (rewritten != match) + { + clone = (SpanFirstQuery) this.Clone(); + clone.match = rewritten; + } + + if (clone != null) + { + return clone; // some clauses rewrote + } + else + { + return this; // no clauses rewrote + } + } + + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (!(o is SpanFirstQuery)) + return false; + + SpanFirstQuery other = (SpanFirstQuery) o; + return this.end == other.end && this.match.Equals(other.match) && this.Boost == other.Boost; + } + + public override int GetHashCode() + { + int h = match.GetHashCode(); + h ^= ((h << 8) | (Number.URShift(h, 25))); // reversible + h ^= System.Convert.ToInt32(Boost) ^ end; + return h; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/SpanNearQuery.cs b/external/Lucene.Net.Light/src/core/Search/Spans/SpanNearQuery.cs new file mode 100644 index 0000000000..ddcac4fc93 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/SpanNearQuery.cs @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Linq; +using Lucene.Net.Index; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using Query = Lucene.Net.Search.Query; + +namespace Lucene.Net.Search.Spans +{ + + /// Matches spans which are near one another. One can specify slop, the + /// maximum number of intervening unmatched positions, as well as whether + /// matches are required to be in-order. + /// + [Serializable] + public class SpanNearQuery : SpanQuery, System.ICloneable + { + protected internal System.Collections.Generic.IList clauses; + protected internal int internalSlop; + protected internal bool inOrder; + + protected internal System.String internalField; + private readonly bool collectPayloads; + + /// Construct a SpanNearQuery. Matches spans matching a span from each + /// clause, with up to slop total unmatched positions between + /// them. * When inOrder is true, the spans from each clause + /// must be * ordered as in clauses. + /// + public SpanNearQuery(SpanQuery[] clauses, int slop, bool inOrder):this(clauses, slop, inOrder, true) + { + } + + public SpanNearQuery(SpanQuery[] clauses, int slop, bool inOrder, bool collectPayloads) + { + + // copy clauses array into an ArrayList + this.clauses = new System.Collections.Generic.List(clauses.Length); + for (int i = 0; i < clauses.Length; i++) + { + SpanQuery clause = clauses[i]; + if (i == 0) + { + // check field + internalField = clause.Field; + } + else if (!clause.Field.Equals(internalField)) + { + throw new System.ArgumentException("Clauses must have same field."); + } + this.clauses.Add(clause); + } + this.collectPayloads = collectPayloads; + this.internalSlop = slop; + this.inOrder = inOrder; + } + + /// Return the clauses whose spans are matched. + public virtual SpanQuery[] GetClauses() + { + // Return a copy + return clauses.ToArray(); + } + + /// Return the maximum number of intervening unmatched positions permitted. + public virtual int Slop + { + get { return internalSlop; } + } + + /// Return true if matches are required to be in-order. + public virtual bool IsInOrder + { + get { return inOrder; } + } + + public override string Field + { + get { return internalField; } + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + foreach (SpanQuery clause in clauses) + { + clause.ExtractTerms(terms); + } + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("spanNear(["); + System.Collections.Generic.IEnumerator i = clauses.GetEnumerator(); + while (i.MoveNext()) + { + SpanQuery clause = i.Current; + buffer.Append(clause.ToString(field)); + buffer.Append(", "); + } + if (clauses.Count > 0) buffer.Length -= 2; + buffer.Append("], "); + buffer.Append(internalSlop); + buffer.Append(", "); + buffer.Append(inOrder); + buffer.Append(")"); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + public override Spans GetSpans(IndexReader reader) + { + if (clauses.Count == 0) + // optimize 0-clause case + return new SpanOrQuery(GetClauses()).GetSpans(reader); + + if (clauses.Count == 1) + // optimize 1-clause case + return clauses[0].GetSpans(reader); + + return inOrder?(Spans) new NearSpansOrdered(this, reader, collectPayloads):(Spans) new NearSpansUnordered(this, reader); + } + + public override Query Rewrite(IndexReader reader) + { + SpanNearQuery clone = null; + for (int i = 0; i < clauses.Count; i++) + { + SpanQuery c = clauses[i]; + SpanQuery query = (SpanQuery) c.Rewrite(reader); + if (query != c) + { + // clause rewrote: must clone + if (clone == null) + clone = (SpanNearQuery) this.Clone(); + clone.clauses[i] = query; + } + } + if (clone != null) + { + return clone; // some clauses rewrote + } + else + { + return this; // no clauses rewrote + } + } + + public override System.Object Clone() + { + int sz = clauses.Count; + SpanQuery[] newClauses = new SpanQuery[sz]; + + for (int i = 0; i < sz; i++) + { + SpanQuery clause = clauses[i]; + newClauses[i] = (SpanQuery) clause.Clone(); + } + SpanNearQuery spanNearQuery = new SpanNearQuery(newClauses, internalSlop, inOrder); + spanNearQuery.Boost = Boost; + return spanNearQuery; + } + + /// Returns true iff o is equal to this. + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (!(o is SpanNearQuery)) + return false; + + SpanNearQuery spanNearQuery = (SpanNearQuery) o; + + if (inOrder != spanNearQuery.inOrder) + return false; + if (internalSlop != spanNearQuery.internalSlop) + return false; + if (clauses.Count != spanNearQuery.clauses.Count) + return false; + System.Collections.IEnumerator iter1 = clauses.GetEnumerator(); + System.Collections.IEnumerator iter2 = spanNearQuery.clauses.GetEnumerator(); + while (iter1.MoveNext() && iter2.MoveNext()) + { + SpanQuery item1 = (SpanQuery)iter1.Current; + SpanQuery item2 = (SpanQuery)iter2.Current; + if (!item1.Equals(item2)) + return false; + } + + return Boost == spanNearQuery.Boost; + } + + public override int GetHashCode() + { + long result = 0; + //mgarski .NET uses the arraylist's location, not contents to calculate the hash + // need to start with result being the hash of the contents. + foreach (SpanQuery sq in clauses) + { + result += sq.GetHashCode(); + } + // Mix bits before folding in things like boost, since it could cancel the + // last element of clauses. This particular mix also serves to + // differentiate SpanNearQuery hashcodes from others. + result ^= ((result << 14) | (Number.URShift(result, 19))); // reversible + result += System.Convert.ToInt32(Boost); + result += internalSlop; + result ^= (inOrder ? (long) 0x99AFD3BD : 0); + return (int) result; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/SpanNotQuery.cs b/external/Lucene.Net.Light/src/core/Search/Spans/SpanNotQuery.cs new file mode 100644 index 0000000000..3f69080067 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/SpanNotQuery.cs @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Index; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using Query = Lucene.Net.Search.Query; + +namespace Lucene.Net.Search.Spans +{ + + /// Removes matches which overlap with another SpanQuery. + [Serializable] + public class SpanNotQuery:SpanQuery, System.ICloneable + { + private class AnonymousClassSpans : Spans + { + public AnonymousClassSpans(Lucene.Net.Index.IndexReader reader, SpanNotQuery enclosingInstance) + { + InitBlock(reader, enclosingInstance); + } + private void InitBlock(Lucene.Net.Index.IndexReader reader, SpanNotQuery enclosingInstance) + { + this.reader = reader; + this.enclosingInstance = enclosingInstance; + includeSpans = Enclosing_Instance.include.GetSpans(reader); + excludeSpans = Enclosing_Instance.exclude.GetSpans(reader); + moreExclude = excludeSpans.Next(); + } + private Lucene.Net.Index.IndexReader reader; + private SpanNotQuery enclosingInstance; + public SpanNotQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private Spans includeSpans; + private bool moreInclude = true; + + private Spans excludeSpans; + private bool moreExclude; + + public override bool Next() + { + if (moreInclude) + // move to next include + moreInclude = includeSpans.Next(); + + while (moreInclude && moreExclude) + { + + if (includeSpans.Doc() > excludeSpans.Doc()) + // skip exclude + moreExclude = excludeSpans.SkipTo(includeSpans.Doc()); + + while (moreExclude && includeSpans.Doc() == excludeSpans.Doc() && excludeSpans.End() <= includeSpans.Start()) + { + moreExclude = excludeSpans.Next(); // increment exclude + } + + if (!moreExclude || includeSpans.Doc() != excludeSpans.Doc() || includeSpans.End() <= excludeSpans.Start()) + break; // we found a match + + moreInclude = includeSpans.Next(); // intersected: keep scanning + } + return moreInclude; + } + + public override bool SkipTo(int target) + { + if (moreInclude) + // skip include + moreInclude = includeSpans.SkipTo(target); + + if (!moreInclude) + return false; + + if (moreExclude && includeSpans.Doc() > excludeSpans.Doc()) + moreExclude = excludeSpans.SkipTo(includeSpans.Doc()); + + while (moreExclude && includeSpans.Doc() == excludeSpans.Doc() && excludeSpans.End() <= includeSpans.Start()) + { + moreExclude = excludeSpans.Next(); // increment exclude + } + + if (!moreExclude || includeSpans.Doc() != excludeSpans.Doc() || includeSpans.End() <= excludeSpans.Start()) + return true; // we found a match + + return Next(); // scan to next match + } + + public override int Doc() + { + return includeSpans.Doc(); + } + public override int Start() + { + return includeSpans.Start(); + } + public override int End() + { + return includeSpans.End(); + } + + // TODO: Remove warning after API has been finalizedb + + public override ICollection GetPayload() + { + System.Collections.Generic.ICollection result = null; + if (includeSpans.IsPayloadAvailable()) + { + result = includeSpans.GetPayload(); + } + return result; + } + + // TODO: Remove warning after API has been finalized + + public override bool IsPayloadAvailable() + { + return includeSpans.IsPayloadAvailable(); + } + + public override System.String ToString() + { + return "spans(" + Enclosing_Instance.ToString() + ")"; + } + } + private SpanQuery include; + private SpanQuery exclude; + + /// Construct a SpanNotQuery matching spans from include which + /// have no overlap with spans from exclude. + /// + public SpanNotQuery(SpanQuery include, SpanQuery exclude) + { + this.include = include; + this.exclude = exclude; + + if (!include.Field.Equals(exclude.Field)) + throw new System.ArgumentException("Clauses must have same field."); + } + + /// Return the SpanQuery whose matches are filtered. + public virtual SpanQuery Include + { + get { return include; } + } + + /// Return the SpanQuery whose matches must not overlap those returned. + public virtual SpanQuery Exclude + { + get { return exclude; } + } + + public override string Field + { + get { return include.Field; } + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + include.ExtractTerms(terms); + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("spanNot("); + buffer.Append(include.ToString(field)); + buffer.Append(", "); + buffer.Append(exclude.ToString(field)); + buffer.Append(")"); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + public override System.Object Clone() + { + SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery) include.Clone(), (SpanQuery) exclude.Clone()); + spanNotQuery.Boost = Boost; + return spanNotQuery; + } + + public override Spans GetSpans(IndexReader reader) + { + return new AnonymousClassSpans(reader, this); + } + + public override Query Rewrite(IndexReader reader) + { + SpanNotQuery clone = null; + + SpanQuery rewrittenInclude = (SpanQuery) include.Rewrite(reader); + if (rewrittenInclude != include) + { + clone = (SpanNotQuery) this.Clone(); + clone.include = rewrittenInclude; + } + SpanQuery rewrittenExclude = (SpanQuery) exclude.Rewrite(reader); + if (rewrittenExclude != exclude) + { + if (clone == null) + clone = (SpanNotQuery) this.Clone(); + clone.exclude = rewrittenExclude; + } + + if (clone != null) + { + return clone; // some clauses rewrote + } + else + { + return this; // no clauses rewrote + } + } + + /// Returns true iff o is equal to this. + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (!(o is SpanNotQuery)) + return false; + + SpanNotQuery other = (SpanNotQuery) o; + return this.include.Equals(other.include) && this.exclude.Equals(other.exclude) && this.Boost == other.Boost; + } + + public override int GetHashCode() + { + int h = include.GetHashCode(); + h = (h << 1) | (Number.URShift(h, 31)); // rotate left + h ^= exclude.GetHashCode(); + h = (h << 1) | (Number.URShift(h, 31)); // rotate left + h ^= System.Convert.ToInt32(Boost); + return h; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/SpanOrQuery.cs b/external/Lucene.Net.Light/src/core/Search/Spans/SpanOrQuery.cs new file mode 100644 index 0000000000..cf99b8eb23 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/SpanOrQuery.cs @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Index; +using Lucene.Net.Support; +using Lucene.Net.Util; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using Query = Lucene.Net.Search.Query; + +namespace Lucene.Net.Search.Spans +{ + + /// Matches the union of its clauses. + [Serializable] + public class SpanOrQuery : SpanQuery, System.ICloneable + { + private class AnonymousClassSpans : Spans + { + public AnonymousClassSpans(Lucene.Net.Index.IndexReader reader, SpanOrQuery enclosingInstance) + { + InitBlock(reader, enclosingInstance); + } + private void InitBlock(Lucene.Net.Index.IndexReader reader, SpanOrQuery enclosingInstance) + { + this.reader = reader; + this.enclosingInstance = enclosingInstance; + } + private Lucene.Net.Index.IndexReader reader; + private SpanOrQuery enclosingInstance; + public SpanOrQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private SpanQueue queue = null; + + private bool InitSpanQueue(int target) + { + queue = new SpanQueue(enclosingInstance, Enclosing_Instance.clauses.Count); + System.Collections.Generic.IEnumerator i = Enclosing_Instance.clauses.GetEnumerator(); + while (i.MoveNext()) + { + Spans spans = i.Current.GetSpans(reader); + if (((target == - 1) && spans.Next()) || ((target != - 1) && spans.SkipTo(target))) + { + queue.Add(spans); + } + } + return queue.Size() != 0; + } + + public override bool Next() + { + if (queue == null) + { + return InitSpanQueue(- 1); + } + + if (queue.Size() == 0) + { + // all done + return false; + } + + if (Top().Next()) + { + // move to next + queue.UpdateTop(); + return true; + } + + queue.Pop(); // exhausted a clause + return queue.Size() != 0; + } + + private Spans Top() + { + return queue.Top(); + } + + public override bool SkipTo(int target) + { + if (queue == null) + { + return InitSpanQueue(target); + } + + bool skipCalled = false; + while (queue.Size() != 0 && Top().Doc() < target) + { + if (Top().SkipTo(target)) + { + queue.UpdateTop(); + } + else + { + queue.Pop(); + } + skipCalled = true; + } + + if (skipCalled) + { + return queue.Size() != 0; + } + return Next(); + } + + public override int Doc() + { + return Top().Doc(); + } + public override int Start() + { + return Top().Start(); + } + public override int End() + { + return Top().End(); + } + + public override ICollection GetPayload() + { + System.Collections.Generic.ICollection result = null; + Spans theTop = Top(); + if (theTop != null && theTop.IsPayloadAvailable()) + { + result = theTop.GetPayload(); + } + return result; + } + + public override bool IsPayloadAvailable() + { + Spans top = Top(); + return top != null && top.IsPayloadAvailable(); + } + + public override System.String ToString() + { + return "spans(" + Enclosing_Instance + ")@" + ((queue == null)?"START":(queue.Size() > 0?(Doc() + ":" + Start() + "-" + End()):"END")); + } + } + + private EquatableList clauses; + private System.String field; + + /// Construct a SpanOrQuery merging the provided clauses. + public SpanOrQuery(params SpanQuery[] clauses) + { + + // copy clauses array into an ArrayList + this.clauses = new EquatableList(clauses.Length); + for (int i = 0; i < clauses.Length; i++) + { + SpanQuery clause = clauses[i]; + if (i == 0) + { + // check field + field = clause.Field; + } + else if (!clause.Field.Equals(field)) + { + throw new System.ArgumentException("Clauses must have same field."); + } + this.clauses.Add(clause); + } + } + + /// Return the clauses whose spans are matched. + public virtual SpanQuery[] GetClauses() + { + return clauses.ToArray(); + } + + public override string Field + { + get { return field; } + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + foreach(SpanQuery clause in clauses) + { + clause.ExtractTerms(terms); + } + } + + public override System.Object Clone() + { + int sz = clauses.Count; + SpanQuery[] newClauses = new SpanQuery[sz]; + + for (int i = 0; i < sz; i++) + { + newClauses[i] = (SpanQuery) clauses[i].Clone(); + } + SpanOrQuery soq = new SpanOrQuery(newClauses); + soq.Boost = Boost; + return soq; + } + + public override Query Rewrite(IndexReader reader) + { + SpanOrQuery clone = null; + for (int i = 0; i < clauses.Count; i++) + { + SpanQuery c = clauses[i]; + SpanQuery query = (SpanQuery) c.Rewrite(reader); + if (query != c) + { + // clause rewrote: must clone + if (clone == null) + clone = (SpanOrQuery) this.Clone(); + clone.clauses[i] = query; + } + } + if (clone != null) + { + return clone; // some clauses rewrote + } + else + { + return this; // no clauses rewrote + } + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + buffer.Append("spanOr(["); + System.Collections.Generic.IEnumerator i = clauses.GetEnumerator(); + int j = 0; + while (i.MoveNext()) + { + j++; + SpanQuery clause = i.Current; + buffer.Append(clause.ToString(field)); + if (j < clauses.Count) + { + buffer.Append(", "); + } + } + buffer.Append("])"); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (o == null || GetType() != o.GetType()) + return false; + + SpanOrQuery that = (SpanOrQuery) o; + + if (!clauses.Equals(that.clauses)) + return false; + if (!(clauses.Count == 0) && !field.Equals(that.field)) + return false; + + return Boost == that.Boost; + } + + public override int GetHashCode() + { + int h = clauses.GetHashCode(); + h ^= ((h << 10) | (Number.URShift(h, 23))); + h ^= System.Convert.ToInt32(Boost); + return h; + } + + + private class SpanQueue : PriorityQueue + { + private void InitBlock(SpanOrQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SpanOrQuery enclosingInstance; + public SpanOrQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public SpanQueue(SpanOrQuery enclosingInstance, int size) + { + InitBlock(enclosingInstance); + Initialize(size); + } + + public override bool LessThan(Spans spans1, Spans spans2) + { + if (spans1.Doc() == spans2.Doc()) + { + if (spans1.Start() == spans2.Start()) + { + return spans1.End() < spans2.End(); + } + else + { + return spans1.Start() < spans2.Start(); + } + } + else + { + return spans1.Doc() < spans2.Doc(); + } + } + } + + public override Spans GetSpans(IndexReader reader) + { + if (clauses.Count == 1) + // optimize 1-clause case + return (clauses[0]).GetSpans(reader); + + return new AnonymousClassSpans(reader, this); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/SpanQuery.cs b/external/Lucene.Net.Light/src/core/Search/Spans/SpanQuery.cs new file mode 100644 index 0000000000..03a615a4d2 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/SpanQuery.cs @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Query = Lucene.Net.Search.Query; +using Searcher = Lucene.Net.Search.Searcher; +using Weight = Lucene.Net.Search.Weight; + +namespace Lucene.Net.Search.Spans +{ + + /// Base class for span-based queries. + [Serializable] + public abstract class SpanQuery:Query + { + /// Expert: Returns the matches for this query in an index. Used internally + /// to search for spans. + /// + public abstract Spans GetSpans(IndexReader reader); + + /// Returns the name of the field matched by this query. + public abstract string Field { get; } + + public override Weight CreateWeight(Searcher searcher) + { + return new SpanWeight(this, searcher); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/SpanScorer.cs b/external/Lucene.Net.Light/src/core/Search/Spans/SpanScorer.cs new file mode 100644 index 0000000000..a44ce08c09 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/SpanScorer.cs @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Explanation = Lucene.Net.Search.Explanation; +using Scorer = Lucene.Net.Search.Scorer; +using Similarity = Lucene.Net.Search.Similarity; +using Weight = Lucene.Net.Search.Weight; + +namespace Lucene.Net.Search.Spans +{ + /// Public for extension only. + public class SpanScorer:Scorer + { + protected internal Spans spans; + protected internal Weight weight; + protected internal byte[] norms; + protected internal float value_Renamed; + + protected internal bool more = true; + + protected internal int doc; + protected internal float freq; + + protected internal SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms):base(similarity) + { + this.spans = spans; + this.norms = norms; + this.weight = weight; + this.value_Renamed = weight.Value; + if (this.spans.Next()) + { + doc = - 1; + } + else + { + doc = NO_MORE_DOCS; + more = false; + } + } + + public override int NextDoc() + { + if (!SetFreqCurrentDoc()) + { + doc = NO_MORE_DOCS; + } + return doc; + } + + public override int Advance(int target) + { + if (!more) + { + return doc = NO_MORE_DOCS; + } + if (spans.Doc() < target) + { + // setFreqCurrentDoc() leaves spans.doc() ahead + more = spans.SkipTo(target); + } + if (!SetFreqCurrentDoc()) + { + doc = NO_MORE_DOCS; + } + return doc; + } + + public /*protected internal*/ virtual bool SetFreqCurrentDoc() + { + if (!more) + { + return false; + } + doc = spans.Doc(); + freq = 0.0f; + do + { + int matchLength = spans.End() - spans.Start(); + freq += Similarity.SloppyFreq(matchLength); + more = spans.Next(); + } + while (more && (doc == spans.Doc())); + return true; + } + + public override int DocID() + { + return doc; + } + + public override float Score() + { + float raw = Similarity.Tf(freq) * value_Renamed; // raw score + return norms == null?raw:raw * Similarity.DecodeNorm(norms[doc]); // normalize + } + + /// + /// This method is no longer an official member of + /// but it is needed by SpanWeight to build an explanation. + /// + protected internal virtual Explanation Explain(int doc) + { + Explanation tfExplanation = new Explanation(); + + int expDoc = Advance(doc); + + float phraseFreq = (expDoc == doc)?freq:0.0f; + tfExplanation.Value = Similarity.Tf(phraseFreq); + tfExplanation.Description = "tf(phraseFreq=" + phraseFreq + ")"; + + return tfExplanation; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/SpanTermQuery.cs b/external/Lucene.Net.Light/src/core/Search/Spans/SpanTermQuery.cs new file mode 100644 index 0000000000..d6fa72118e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/SpanTermQuery.cs @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search.Spans +{ + + /// Matches spans containing a term. + [Serializable] + public class SpanTermQuery:SpanQuery + { + protected internal Term internalTerm; + + /// Construct a SpanTermQuery matching the named term's spans. + public SpanTermQuery(Term term) + { + this.internalTerm = term; + } + + /// Return the term whose spans are matched. + public virtual Term Term + { + get { return internalTerm; } + } + + public override string Field + { + get { return internalTerm.Field; } + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + terms.Add(internalTerm); + } + + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + if (internalTerm.Field.Equals(field)) + buffer.Append(internalTerm.Text); + else + { + buffer.Append(internalTerm.ToString()); + } + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + public override int GetHashCode() + { + int prime = 31; + int result = base.GetHashCode(); + result = prime * result + ((internalTerm == null)?0:internalTerm.GetHashCode()); + return result; + } + + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (!base.Equals(obj)) + return false; + if (GetType() != obj.GetType()) + return false; + SpanTermQuery other = (SpanTermQuery) obj; + if (internalTerm == null) + { + if (other.internalTerm != null) + return false; + } + else if (!internalTerm.Equals(other.internalTerm)) + return false; + return true; + } + + public override Spans GetSpans(IndexReader reader) + { + return new TermSpans(reader.TermPositions(internalTerm), internalTerm); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/SpanWeight.cs b/external/Lucene.Net.Light/src/core/Search/Spans/SpanWeight.cs new file mode 100644 index 0000000000..3590049f56 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/SpanWeight.cs @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Lucene.Net.Index; +using IndexReader = Lucene.Net.Index.IndexReader; +using Lucene.Net.Search; +using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation; + +namespace Lucene.Net.Search.Spans +{ + + /// Expert-only. Public for use by other weight implementations + [Serializable] + public class SpanWeight:Weight + { + protected internal Similarity similarity; + protected internal float value_Renamed; + protected internal float idf; + protected internal float queryNorm; + protected internal float queryWeight; + + protected internal ISet terms; + protected internal SpanQuery internalQuery; + private IDFExplanation idfExp; + + public SpanWeight(SpanQuery query, Searcher searcher) + { + this.similarity = query.GetSimilarity(searcher); + this.internalQuery = query; + + terms = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet(); + query.ExtractTerms(terms); + + idfExp = similarity.IdfExplain(terms, searcher); + idf = idfExp.Idf; + } + + public override Query Query + { + get { return internalQuery; } + } + + public override float Value + { + get { return value_Renamed; } + } + + public override float GetSumOfSquaredWeights() + { + queryWeight = idf*internalQuery.Boost; // compute query weight + return queryWeight*queryWeight; // square it + } + + public override void Normalize(float queryNorm) + { + this.queryNorm = queryNorm; + queryWeight *= queryNorm; // normalize query weight + value_Renamed = queryWeight * idf; // idf for document + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + return new SpanScorer(internalQuery.GetSpans(reader), this, similarity, reader.Norms(internalQuery.Field)); + } + + public override Explanation Explain(IndexReader reader, int doc) + { + + ComplexExplanation result = new ComplexExplanation(); + result.Description = "weight(" + Query + " in " + doc + "), product of:"; + System.String field = ((SpanQuery) Query).Field; + + Explanation idfExpl = new Explanation(idf, "idf(" + field + ": " + idfExp.Explain() + ")"); + + // explain query weight + Explanation queryExpl = new Explanation(); + queryExpl.Description = "queryWeight(" + Query + "), product of:"; + + Explanation boostExpl = new Explanation(Query.Boost, "boost"); + if (Query.Boost != 1.0f) + queryExpl.AddDetail(boostExpl); + queryExpl.AddDetail(idfExpl); + + Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); + queryExpl.AddDetail(queryNormExpl); + + queryExpl.Value = boostExpl.Value * idfExpl.Value * queryNormExpl.Value; + + result.AddDetail(queryExpl); + + // explain field weight + ComplexExplanation fieldExpl = new ComplexExplanation(); + fieldExpl.Description = "fieldWeight(" + field + ":" + internalQuery.ToString(field) + " in " + doc + "), product of:"; + + Explanation tfExpl = ((SpanScorer)Scorer(reader, true, false)).Explain(doc); + fieldExpl.AddDetail(tfExpl); + fieldExpl.AddDetail(idfExpl); + + Explanation fieldNormExpl = new Explanation(); + byte[] fieldNorms = reader.Norms(field); + float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f; + fieldNormExpl.Value = fieldNorm; + fieldNormExpl.Description = "fieldNorm(field=" + field + ", doc=" + doc + ")"; + fieldExpl.AddDetail(fieldNormExpl); + + fieldExpl.Match = tfExpl.IsMatch; + fieldExpl.Value = tfExpl.Value * idfExpl.Value * fieldNormExpl.Value; + + result.AddDetail(fieldExpl); + System.Boolean? tempAux = fieldExpl.Match; + result.Match = tempAux; + + // combine them + result.Value = queryExpl.Value * fieldExpl.Value; + + if (queryExpl.Value == 1.0f) + return fieldExpl; + + return result; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/Spans.cs b/external/Lucene.Net.Light/src/core/Search/Spans/Spans.cs new file mode 100644 index 0000000000..dad803f206 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/Spans.cs @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Search.Spans +{ + + /// Expert: an enumeration of span matches. Used to implement span searching. + /// Each span represents a range of term positions within a document. Matches + /// are enumerated in order, by increasing document number, within that by + /// increasing start position and finally by increasing end position. + /// + public abstract class Spans + { + /// Move to the next match, returning true iff any such exists. + public abstract bool Next(); + + /// Skips to the first match beyond the current, whose document number is + /// greater than or equal to target.

Returns true iff there is such + /// a match.

Behaves as if written: + /// boolean skipTo(int target) { + /// do { + /// if (!next()) + /// return false; + /// } while (target > doc()); + /// return true; + /// } + /// + /// Most implementations are considerably more efficient than that. + ///

+ public abstract bool SkipTo(int target); + + /// Returns the document number of the current match. Initially invalid. + public abstract int Doc(); + + /// Returns the start position of the current match. Initially invalid. + public abstract int Start(); + + /// Returns the end position of the current match. Initially invalid. + public abstract int End(); + + /// Returns the payload data for the current span. + /// This is invalid until is called for + /// the first time. + /// This method must not be called more than once after each call + /// of . However, most payloads are loaded lazily, + /// so if the payload data for the current position is not needed, + /// this method may not be called at all for performance reasons. An ordered + /// SpanQuery does not lazy load, so if you have payloads in your index and + /// you do not want ordered SpanNearQuerys to collect payloads, you can + /// disable collection with a constructor option.
+ /// + /// Note that the return type is a collection, thus the ordering should not be relied upon. + ///
+ ///

+ /// WARNING: The status of the Payloads feature is experimental. + /// The APIs introduced here might change in the future and will not be + /// supported anymore in such a case.

+ /// + ///

+ /// a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false + /// java.io.IOException + // TODO: Remove warning after API has been finalized + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public abstract ICollection GetPayload(); + + /// Checks if a payload can be loaded at this position. + ///

+ /// Payloads can only be loaded once per call to + /// . + /// + ///

+ /// true if there is a payload available at this position that can be loaded + public abstract bool IsPayloadAvailable(); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Spans/TermSpans.cs b/external/Lucene.Net.Light/src/core/Search/Spans/TermSpans.cs new file mode 100644 index 0000000000..3e9a3bbae0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Spans/TermSpans.cs @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using Term = Lucene.Net.Index.Term; +using TermPositions = Lucene.Net.Index.TermPositions; + +namespace Lucene.Net.Search.Spans +{ + + /// Expert: + /// Public for extension only + /// + public class TermSpans:Spans + { + protected internal TermPositions internalPositions; + protected internal Term term; + protected internal int internalDoc; + protected internal int freq; + protected internal int count; + protected internal int position; + + + public TermSpans(TermPositions positions, Term term) + { + + this.internalPositions = positions; + this.term = term; + internalDoc = - 1; + } + + public override bool Next() + { + if (count == freq) + { + if (!internalPositions.Next()) + { + internalDoc = int.MaxValue; + return false; + } + internalDoc = internalPositions.Doc; + freq = internalPositions.Freq; + count = 0; + } + position = internalPositions.NextPosition(); + count++; + return true; + } + + public override bool SkipTo(int target) + { + if (!internalPositions.SkipTo(target)) + { + internalDoc = int.MaxValue; + return false; + } + + internalDoc = internalPositions.Doc; + freq = internalPositions.Freq; + count = 0; + + position = internalPositions.NextPosition(); + count++; + + return true; + } + + public override int Doc() + { + return internalDoc; + } + + public override int Start() + { + return position; + } + + public override int End() + { + return position + 1; + } + + // TODO: Remove warning after API has been finalized + + public override ICollection GetPayload() + { + byte[] bytes = new byte[internalPositions.PayloadLength]; + bytes = internalPositions.GetPayload(bytes, 0); + var val = new System.Collections.Generic.List(); + val.Add(bytes); + return val; + } + + // TODO: Remove warning after API has been finalized + + public override bool IsPayloadAvailable() + { + return internalPositions.IsPayloadAvailable; + } + + public override System.String ToString() + { + return "spans(" + term.ToString() + ")@" + (internalDoc == - 1?"START":((internalDoc == System.Int32.MaxValue)?"END":internalDoc + "-" + position)); + } + + public virtual TermPositions Positions + { + get { return internalPositions; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TermQuery.cs b/external/Lucene.Net.Light/src/core/Search/TermQuery.cs new file mode 100644 index 0000000000..a04ec7a423 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TermQuery.cs @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using TermDocs = Lucene.Net.Index.TermDocs; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; +using IDFExplanation = Lucene.Net.Search.Explanation.IDFExplanation; + +namespace Lucene.Net.Search +{ + + /// A Query that matches documents containing a term. + /// This may be combined with other terms with a . + /// + [Serializable] + public class TermQuery:Query + { + private Term term; + + [Serializable] + private class TermWeight:Weight + { + private void InitBlock(TermQuery enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private TermQuery enclosingInstance; + public TermQuery Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private Similarity similarity; + private float value_Renamed; + private float idf; + private float queryNorm; + private float queryWeight; + private IDFExplanation idfExp; + + public TermWeight(TermQuery enclosingInstance, Searcher searcher) + { + InitBlock(enclosingInstance); + this.similarity = Enclosing_Instance.GetSimilarity(searcher); + idfExp = similarity.IdfExplain(Enclosing_Instance.term, searcher); + idf = idfExp.Idf; + } + + public override System.String ToString() + { + return "weight(" + Enclosing_Instance + ")"; + } + + public override Query Query + { + get { return Enclosing_Instance; } + } + + public override float Value + { + get { return value_Renamed; } + } + + public override float GetSumOfSquaredWeights() + { + queryWeight = idf*Enclosing_Instance.Boost; // compute query weight + return queryWeight*queryWeight; // square it + } + + public override void Normalize(float queryNorm) + { + this.queryNorm = queryNorm; + queryWeight *= queryNorm; // normalize query weight + value_Renamed = queryWeight * idf; // idf for document + } + + public override Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer) + { + TermDocs termDocs = reader.TermDocs(Enclosing_Instance.term); + + if (termDocs == null) + return null; + + return new TermScorer(this, termDocs, similarity, reader.Norms(Enclosing_Instance.term.Field)); + } + + public override Explanation Explain(IndexReader reader, int doc) + { + + ComplexExplanation result = new ComplexExplanation(); + result.Description = "weight(" + Query + " in " + doc + "), product of:"; + + Explanation expl = new Explanation(idf, idfExp.Explain()); + + // explain query weight + Explanation queryExpl = new Explanation(); + queryExpl.Description = "queryWeight(" + Query + "), product of:"; + + Explanation boostExpl = new Explanation(Enclosing_Instance.Boost, "boost"); + if (Enclosing_Instance.Boost != 1.0f) + queryExpl.AddDetail(boostExpl); + queryExpl.AddDetail(expl); + + Explanation queryNormExpl = new Explanation(queryNorm, "queryNorm"); + queryExpl.AddDetail(queryNormExpl); + + queryExpl.Value = boostExpl.Value * expl.Value * queryNormExpl.Value; + + result.AddDetail(queryExpl); + + // explain field weight + System.String field = Enclosing_Instance.term.Field; + ComplexExplanation fieldExpl = new ComplexExplanation(); + fieldExpl.Description = "fieldWeight(" + Enclosing_Instance.term + " in " + doc + "), product of:"; + + Explanation tfExplanation = new Explanation(); + int tf = 0; + TermDocs termDocs = reader.TermDocs(enclosingInstance.term); + if (termDocs != null) + { + try + { + if (termDocs.SkipTo(doc) && termDocs.Doc == doc) + { + tf = termDocs.Freq; + } + } + finally + { + termDocs.Close(); + } + tfExplanation.Value = similarity.Tf(tf); + tfExplanation.Description = "tf(termFreq(" + enclosingInstance.term + ")=" + tf + ")"; + } + else + { + tfExplanation.Value = 0.0f; + tfExplanation.Description = "no matching term"; + } + fieldExpl.AddDetail(tfExplanation); + fieldExpl.AddDetail(expl); + + Explanation fieldNormExpl = new Explanation(); + byte[] fieldNorms = reader.Norms(field); + float fieldNorm = fieldNorms != null?Similarity.DecodeNorm(fieldNorms[doc]):1.0f; + fieldNormExpl.Value = fieldNorm; + fieldNormExpl.Description = "fieldNorm(field=" + field + ", doc=" + doc + ")"; + fieldExpl.AddDetail(fieldNormExpl); + + fieldExpl.Match = tfExplanation.IsMatch; + fieldExpl.Value = tfExplanation.Value * expl.Value * fieldNormExpl.Value; + + result.AddDetail(fieldExpl); + System.Boolean? tempAux = fieldExpl.Match; + result.Match = tempAux; + + // combine them + result.Value = queryExpl.Value * fieldExpl.Value; + + if (queryExpl.Value == 1.0f) + return fieldExpl; + + return result; + } + } + + /// Constructs a query for the term t. + public TermQuery(Term t) + { + term = t; + } + + /// Returns the term of this query. + public virtual Term Term + { + get { return term; } + } + + public override Weight CreateWeight(Searcher searcher) + { + return new TermWeight(this, searcher); + } + + public override void ExtractTerms(System.Collections.Generic.ISet terms) + { + terms.Add(Term); + } + + /// Prints a user-readable version of this query. + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + if (!term.Field.Equals(field)) + { + buffer.Append(term.Field); + buffer.Append(":"); + } + buffer.Append(term.Text); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + /// Returns true iff o is equal to this. + public override bool Equals(System.Object o) + { + if (!(o is TermQuery)) + return false; + TermQuery other = (TermQuery) o; + return (this.Boost == other.Boost) && this.term.Equals(other.term); + } + + /// Returns a hash code value for this object. + public override int GetHashCode() + { + return BitConverter.ToInt32(BitConverter.GetBytes(Boost), 0) ^ term.GetHashCode(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TermRangeFilter.cs b/external/Lucene.Net.Light/src/core/Search/TermRangeFilter.cs new file mode 100644 index 0000000000..ed157bd282 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TermRangeFilter.cs @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Globalization; + +namespace Lucene.Net.Search +{ + + /// A Filter that restricts search results to a range of values in a given + /// field. + /// + ///

This filter matches the documents looking for terms that fall into the + /// supplied range according to . It is not intended + /// for numerical ranges, use instead. + /// + ///

If you construct a large number of range filters with different ranges but on the + /// same field, may have significantly better performance. + ///

+ /// 2.9 + /// + [Serializable] + public class TermRangeFilter:MultiTermQueryWrapperFilter + { + + /// The field this range applies to + /// + /// The lower bound on this range + /// + /// The upper bound on this range + /// + /// Does this range include the lower bound? + /// + /// Does this range include the upper bound? + /// + /// IllegalArgumentException if both terms are null or if + /// lowerTerm is null and includeLower is true (similar for upperTerm + /// and includeUpper) + /// + public TermRangeFilter(System.String fieldName, System.String lowerTerm, System.String upperTerm, bool includeLower, bool includeUpper):base(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper)) + { + } + + /// WARNING: Using this constructor and supplying a non-null + /// value in the collator parameter will cause every single + /// index Term in the Field referenced by lowerTerm and/or upperTerm to be + /// examined. Depending on the number of index Terms in this Field, the + /// operation could be very slow. + /// + /// + /// + /// The lower bound on this range + /// + /// The upper bound on this range + /// + /// Does this range include the lower bound? + /// + /// Does this range include the upper bound? + /// + /// The collator to use when determining range inclusion; set + /// to null to use Unicode code point ordering instead of collation. + /// + /// IllegalArgumentException if both terms are null or if + /// lowerTerm is null and includeLower is true (similar for upperTerm + /// and includeUpper) + /// + public TermRangeFilter(System.String fieldName, System.String lowerTerm, System.String upperTerm, bool includeLower, bool includeUpper, System.Globalization.CompareInfo collator):base(new TermRangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)) + { + } + + /// Constructs a filter for field fieldName matching + /// less than or equal to upperTerm. + /// + public static TermRangeFilter Less(System.String fieldName, System.String upperTerm) + { + return new TermRangeFilter(fieldName, null, upperTerm, false, true); + } + + /// Constructs a filter for field fieldName matching + /// greater than or equal to lowerTerm. + /// + public static TermRangeFilter More(System.String fieldName, System.String lowerTerm) + { + return new TermRangeFilter(fieldName, lowerTerm, null, true, false); + } + + /// Returns the field name for this filter + public virtual string Field + { + get { return query.Field; } + } + + /// Returns the lower value of this range filter + public virtual string LowerTerm + { + get { return query.LowerTerm; } + } + + /// Returns the upper value of this range filter + public virtual string UpperTerm + { + get { return query.UpperTerm; } + } + + /// Returns true if the lower endpoint is inclusive + public virtual bool IncludesLower + { + get { return query.IncludesLower; } + } + + /// Returns true if the upper endpoint is inclusive + public virtual bool IncludesUpper + { + get { return query.IncludesUpper; } + } + + /// Returns the collator used to determine range inclusion, if any. + public virtual CompareInfo Collator + { + get { return query.Collator; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TermRangeQuery.cs b/external/Lucene.Net.Light/src/core/Search/TermRangeQuery.cs new file mode 100644 index 0000000000..a27b18ec27 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TermRangeQuery.cs @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Globalization; +using IndexReader = Lucene.Net.Index.IndexReader; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search +{ + + /// A Query that matches documents within an exclusive range of terms. + /// + ///

This query matches the documents looking for terms that fall into the + /// supplied range according to . It is not intended + /// for numerical ranges, use instead. + /// + ///

This query uses the + /// + /// rewrite method. + ///

+ /// 2.9 + /// + + [Serializable] + public class TermRangeQuery:MultiTermQuery + { + private System.String lowerTerm; + private System.String upperTerm; + private System.Globalization.CompareInfo collator; + private System.String field; + private bool includeLower; + private bool includeUpper; + + + /// Constructs a query selecting all terms greater/equal than lowerTerm + /// but less/equal than upperTerm. + /// + ///

+ /// If an endpoint is null, it is said + /// to be "open". Either or both endpoints may be open. Open endpoints may not + /// be exclusive (you can't select all but the first or last term without + /// explicitly specifying the term to exclude.) + /// + ///

+ /// The field that holds both lower and upper terms. + /// + /// The term text at the lower end of the range + /// + /// The term text at the upper end of the range + /// + /// If true, the lowerTerm is + /// included in the range. + /// + /// If true, the upperTerm is + /// included in the range. + /// + public TermRangeQuery(System.String field, System.String lowerTerm, System.String upperTerm, bool includeLower, bool includeUpper):this(field, lowerTerm, upperTerm, includeLower, includeUpper, null) + { + } + + /// Constructs a query selecting all terms greater/equal than + /// lowerTerm but less/equal than upperTerm. + ///

+ /// If an endpoint is null, it is said + /// to be "open". Either or both endpoints may be open. Open endpoints may not + /// be exclusive (you can't select all but the first or last term without + /// explicitly specifying the term to exclude.) + ///

+ /// If collator is not null, it will be used to decide whether + /// index terms are within the given range, rather than using the Unicode code + /// point order in which index terms are stored. + ///

+ /// WARNING: Using this constructor and supplying a non-null + /// value in the collator parameter will cause every single + /// index Term in the Field referenced by lowerTerm and/or upperTerm to be + /// examined. Depending on the number of index Terms in this Field, the + /// operation could be very slow. + /// + ///

+ /// + /// The Term text at the lower end of the range + /// + /// The Term text at the upper end of the range + /// + /// If true, the lowerTerm is + /// included in the range. + /// + /// If true, the upperTerm is + /// included in the range. + /// + /// The collator to use to collate index Terms, to determine + /// their membership in the range bounded by lowerTerm and + /// upperTerm. + /// + public TermRangeQuery(System.String field, System.String lowerTerm, System.String upperTerm, bool includeLower, bool includeUpper, System.Globalization.CompareInfo collator) + { + this.field = field; + this.lowerTerm = lowerTerm; + this.upperTerm = upperTerm; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + this.collator = collator; + } + + /// Returns the field name for this query + public virtual string Field + { + get { return field; } + } + + /// Returns the lower value of this range query + public virtual string LowerTerm + { + get { return lowerTerm; } + } + + /// Returns the upper value of this range query + public virtual string UpperTerm + { + get { return upperTerm; } + } + + /// Returns true if the lower endpoint is inclusive + public virtual bool IncludesLower + { + get { return includeLower; } + } + + /// Returns true if the upper endpoint is inclusive + public virtual bool IncludesUpper + { + get { return includeUpper; } + } + + /// Returns the collator used to determine range inclusion, if any. + public virtual CompareInfo Collator + { + get { return collator; } + } + + protected internal override FilteredTermEnum GetEnum(IndexReader reader) + { + return new TermRangeTermEnum(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); + } + + /// Prints a user-readable version of this query. + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + if (!Field.Equals(field)) + { + buffer.Append(Field); + buffer.Append(":"); + } + buffer.Append(includeLower?'[':'{'); + buffer.Append(lowerTerm != null?lowerTerm:"*"); + buffer.Append(" TO "); + buffer.Append(upperTerm != null?upperTerm:"*"); + buffer.Append(includeUpper?']':'}'); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + //@Override + public override int GetHashCode() + { + int prime = 31; + int result = base.GetHashCode(); + result = prime * result + ((collator == null)?0:collator.GetHashCode()); + result = prime * result + ((field == null)?0:field.GetHashCode()); + result = prime * result + (includeLower?1231:1237); + result = prime * result + (includeUpper?1231:1237); + result = prime * result + ((lowerTerm == null)?0:lowerTerm.GetHashCode()); + result = prime * result + ((upperTerm == null)?0:upperTerm.GetHashCode()); + return result; + } + + //@Override + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (!base.Equals(obj)) + return false; + if (GetType() != obj.GetType()) + return false; + TermRangeQuery other = (TermRangeQuery) obj; + if (collator == null) + { + if (other.collator != null) + return false; + } + else if (!collator.Equals(other.collator)) + return false; + if (field == null) + { + if (other.field != null) + return false; + } + else if (!field.Equals(other.field)) + return false; + if (includeLower != other.includeLower) + return false; + if (includeUpper != other.includeUpper) + return false; + if (lowerTerm == null) + { + if (other.lowerTerm != null) + return false; + } + else if (!lowerTerm.Equals(other.lowerTerm)) + return false; + if (upperTerm == null) + { + if (other.upperTerm != null) + return false; + } + else if (!upperTerm.Equals(other.upperTerm)) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TermRangeTermEnum.cs b/external/Lucene.Net.Light/src/core/Search/TermRangeTermEnum.cs new file mode 100644 index 0000000000..fa03ff2b9e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TermRangeTermEnum.cs @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using StringHelper = Lucene.Net.Util.StringHelper; + +namespace Lucene.Net.Search +{ + + /// Subclass of FilteredTermEnum for enumerating all terms that match the + /// specified range parameters. + ///

+ /// Term enumerations are always ordered by Term.compareTo(). Each term in + /// the enumeration is greater than all that precede it. + ///

+ /// 2.9 + /// + public class TermRangeTermEnum:FilteredTermEnum + { + + private System.Globalization.CompareInfo collator = null; + private bool endEnum = false; + private System.String field; + private System.String upperTermText; + private System.String lowerTermText; + private bool includeLower; + private bool includeUpper; + + /// Enumerates all terms greater/equal than lowerTerm + /// but less/equal than upperTerm. + /// + /// If an endpoint is null, it is said to be "open". Either or both + /// endpoints may be open. Open endpoints may not be exclusive + /// (you can't select all but the first or last term without + /// explicitly specifying the term to exclude.) + /// + /// + /// + /// + /// An interned field that holds both lower and upper terms. + /// + /// The term text at the lower end of the range + /// + /// The term text at the upper end of the range + /// + /// If true, the lowerTerm is included in the range. + /// + /// If true, the upperTerm is included in the range. + /// + /// The collator to use to collate index Terms, to determine their + /// membership in the range bounded by lowerTerm and + /// upperTerm. + /// + /// + /// IOException + public TermRangeTermEnum(IndexReader reader, System.String field, System.String lowerTermText, System.String upperTermText, bool includeLower, bool includeUpper, System.Globalization.CompareInfo collator) + { + this.collator = collator; + this.upperTermText = upperTermText; + this.lowerTermText = lowerTermText; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + this.field = StringHelper.Intern(field); + + // do a little bit of normalization... + // open ended range queries should always be inclusive. + if (this.lowerTermText == null) + { + this.lowerTermText = ""; + this.includeLower = true; + } + + if (this.upperTermText == null) + { + this.includeUpper = true; + } + + System.String startTermText = collator == null?this.lowerTermText:""; + SetEnum(reader.Terms(new Term(this.field, startTermText))); + } + + public override float Difference() + { + return 1.0f; + } + + public override bool EndEnum() + { + return endEnum; + } + + protected internal override bool TermCompare(Term term) + { + if (collator == null) + { + // Use Unicode code point ordering + bool checkLower = !includeLower; + if (term != null && (System.Object) term.Field == (System.Object) field) + { + // interned comparison + if (!checkLower || null == lowerTermText || String.CompareOrdinal(term.Text, lowerTermText) > 0) + { + checkLower = false; + if (upperTermText != null) + { + int compare = String.CompareOrdinal(upperTermText, term.Text); + /* + * if beyond the upper term, or is exclusive and this is equal to + * the upper term, break out + */ + if ((compare < 0) || (!includeUpper && compare == 0)) + { + endEnum = true; + return false; + } + } + return true; + } + } + else + { + // break + endEnum = true; + return false; + } + return false; + } + else + { + if (term != null && (System.Object) term.Field == (System.Object) field) + { + // interned comparison + if ((lowerTermText == null || (includeLower?collator.Compare(term.Text.ToString(), lowerTermText.ToString()) >= 0:collator.Compare(term.Text.ToString(), lowerTermText.ToString()) > 0)) && (upperTermText == null || (includeUpper?collator.Compare(term.Text.ToString(), upperTermText.ToString()) <= 0:collator.Compare(term.Text.ToString(), upperTermText.ToString()) < 0))) + { + return true; + } + return false; + } + endEnum = true; + return false; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TermScorer.cs b/external/Lucene.Net.Light/src/core/Search/TermScorer.cs new file mode 100644 index 0000000000..88863bb171 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TermScorer.cs @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using TermDocs = Lucene.Net.Index.TermDocs; + +namespace Lucene.Net.Search +{ + + /// Expert: A Scorer for documents matching a Term. + public sealed class TermScorer:Scorer + { + + private static readonly float[] SIM_NORM_DECODER; + + private Weight weight; + private TermDocs termDocs; + private byte[] norms; + private float weightValue; + private int doc = - 1; + + private int[] docs = new int[32]; // buffered doc numbers + private int[] freqs = new int[32]; // buffered term freqs + private int pointer; + private int pointerMax; + + private const int SCORE_CACHE_SIZE = 32; + private float[] scoreCache = new float[SCORE_CACHE_SIZE]; + + /// Construct a TermScorer. + /// + /// + /// The weight of the Term in the query. + /// + /// An iterator over the documents matching the Term. + /// + /// The Similarity implementation to be used for score + /// computations. + /// + /// The field norms of the document fields for the Term. + /// + public /*internal*/ TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms):base(similarity) + { + this.weight = weight; + this.termDocs = td; + this.norms = norms; + this.weightValue = weight.Value; + + for (int i = 0; i < SCORE_CACHE_SIZE; i++) + scoreCache[i] = Similarity.Tf(i) * weightValue; + } + + public override void Score(Collector c) + { + Score(c, System.Int32.MaxValue, NextDoc()); + } + + // firstDocID is ignored since nextDoc() sets 'doc' + public /*protected internal*/ override bool Score(Collector c, int end, int firstDocID) + { + c.SetScorer(this); + while (doc < end) + { + // for docs in window + c.Collect(doc); // collect score + + if (++pointer >= pointerMax) + { + pointerMax = termDocs.Read(docs, freqs); // refill buffers + if (pointerMax != 0) + { + pointer = 0; + } + else + { + termDocs.Close(); // close stream + doc = System.Int32.MaxValue; // set to sentinel value + return false; + } + } + doc = docs[pointer]; + } + return true; + } + + public override int DocID() + { + return doc; + } + + /// Advances to the next document matching the query.
+ /// The iterator over the matching documents is buffered using + /// . + /// + ///
+ /// the document matching the query or -1 if there are no more documents. + /// + public override int NextDoc() + { + pointer++; + if (pointer >= pointerMax) + { + pointerMax = termDocs.Read(docs, freqs); // refill buffer + if (pointerMax != 0) + { + pointer = 0; + } + else + { + termDocs.Close(); // close stream + return doc = NO_MORE_DOCS; + } + } + doc = docs[pointer]; + return doc; + } + + public override float Score() + { + System.Diagnostics.Debug.Assert(doc != - 1); + int f = freqs[pointer]; + float raw = f < SCORE_CACHE_SIZE?scoreCache[f]:Similarity.Tf(f) * weightValue; // cache miss + + return norms == null?raw:raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field + } + + /// Advances to the first match beyond the current whose document number is + /// greater than or equal to a given target.
+ /// The implementation uses . + /// + ///
+ /// The target document number. + /// + /// the matching document or -1 if none exist. + /// + public override int Advance(int target) + { + // first scan in cache + for (pointer++; pointer < pointerMax; pointer++) + { + if (docs[pointer] >= target) + { + return doc = docs[pointer]; + } + } + + // not found in cache, seek underlying stream + bool result = termDocs.SkipTo(target); + if (result) + { + pointerMax = 1; + pointer = 0; + docs[pointer] = doc = termDocs.Doc; + freqs[pointer] = termDocs.Freq; + } + else + { + doc = NO_MORE_DOCS; + } + return doc; + } + + /// Returns a string representation of this TermScorer. + public override System.String ToString() + { + return "scorer(" + weight + ")"; + } + static TermScorer() + { + SIM_NORM_DECODER = Search.Similarity.GetNormDecoder(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TimeLimitingCollector.cs b/external/Lucene.Net.Light/src/core/Search/TimeLimitingCollector.cs new file mode 100644 index 0000000000..2917df3a0a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TimeLimitingCollector.cs @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// The is used to timeout search requests that + /// take longer than the maximum allowed search time limit. After this time is + /// exceeded, the search thread is stopped by throwing a + /// . + /// + public class TimeLimitingCollector:Collector + { + private void InitBlock() + { + greedy = DEFAULT_GREEDY; + } + + /// Default timer resolution. + /// + /// + public const int DEFAULT_RESOLUTION = 20; + + /// Default for . + /// + /// + public bool DEFAULT_GREEDY = false; + + private static uint resolution = DEFAULT_RESOLUTION; + + private bool greedy; + + private sealed class TimerThread:ThreadClass + { + + // NOTE: we can avoid explicit synchronization here for several reasons: + // * updates to volatile long variables are atomic + // * only single thread modifies this value + // * use of volatile keyword ensures that it does not reside in + // a register, but in main memory (so that changes are visible to + // other threads). + // * visibility of changes does not need to be instantanous, we can + // afford losing a tick or two. + // + // See section 17 of the Java Language Specification for details. + private volatile uint time = 0; + + /// TimerThread provides a pseudo-clock service to all searching + /// threads, so that they can count elapsed time with less overhead + /// than repeatedly calling System.currentTimeMillis. A single + /// thread should be created to be used for all searches. + /// + internal TimerThread():base("TimeLimitedCollector timer thread") + { + this.IsBackground = true; + } + + override public void Run() + { + while (true) + { + // TODO: Use System.nanoTime() when Lucene moves to Java SE 5. + time += Lucene.Net.Search.TimeLimitingCollector.resolution; + System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * Lucene.Net.Search.TimeLimitingCollector.resolution)); + + } + } + + /// Get the timer value in milliseconds. + public long Milliseconds + { + get { return time; } + } + } + + /// Thrown when elapsed search time exceeds allowed search time. + [Serializable] + public class TimeExceededException:System.SystemException + { + private long timeAllowed; + private long timeElapsed; + private int lastDocCollected; + internal TimeExceededException(long timeAllowed, long timeElapsed, int lastDocCollected):base("Elapsed time: " + timeElapsed + "Exceeded allowed search time: " + timeAllowed + " ms.") + { + this.timeAllowed = timeAllowed; + this.timeElapsed = timeElapsed; + this.lastDocCollected = lastDocCollected; + } + + /// Returns allowed time (milliseconds). + public virtual long TimeAllowed + { + get { return timeAllowed; } + } + + /// Returns elapsed time (milliseconds). + public virtual long TimeElapsed + { + get { return timeElapsed; } + } + + /// Returns last doc(absolute doc id) that was collected when the search time exceeded. + public virtual int LastDocCollected + { + get { return lastDocCollected; } + } + } + + // Declare and initialize a single static timer thread to be used by + // all TimeLimitedCollector instances. The JVM assures that + // this only happens once. + private static readonly TimerThread TIMER_THREAD = new TimerThread(); + + private long t0; + private long timeout; + private Collector collector; + + private int docBase; + + /// Create a TimeLimitedCollector wrapper over another with a specified timeout. + /// the wrapped + /// + /// max time allowed for collecting hits after which is thrown + /// + public TimeLimitingCollector(Collector collector, long timeAllowed) + { + InitBlock(); + this.collector = collector; + t0 = TIMER_THREAD.Milliseconds; + this.timeout = t0 + timeAllowed; + } + + /// + /// Gets or sets the timer resolution. + /// The default timer resolution is 20 milliseconds. + /// This means that a search required to take no longer than + /// 800 milliseconds may be stopped after 780 to 820 milliseconds. + ///
Note that: + /// + /// Finer (smaller) resolution is more accurate but less efficient. + /// Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds. + /// Setting resolution smaller than current resolution might take effect only after current + /// resolution. (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, + /// then it can take up to 20 milliseconds for the change to have effect. + /// + ///
+ public static long Resolution + { + get { return resolution; } + set + { + // 5 milliseconds is about the minimum reasonable time for a Object.wait(long) call. + resolution = (uint)System.Math.Max(value, 5); + } + } + + /// Checks if this time limited collector is greedy in collecting the last hit. + /// A non greedy collector, upon a timeout, would throw a + /// without allowing the wrapped collector to collect current doc. A greedy one would + /// first allow the wrapped hit collector to collect current doc and only then + /// throw a . + /// + public virtual bool IsGreedy + { + get { return greedy; } + set { this.greedy = value; } + } + + /// Calls on the decorated + /// unless the allowed time has passed, in which case it throws an exception. + /// + /// + /// TimeExceededException + /// if the time allowed has exceeded. + /// + public override void Collect(int doc) + { + long time = TIMER_THREAD.Milliseconds; + if (timeout < time) + { + if (greedy) + { + //System.out.println(this+" greedy: before failing, collecting doc: "+doc+" "+(time-t0)); + collector.Collect(doc); + } + //System.out.println(this+" failing on: "+doc+" "+(time-t0)); + throw new TimeExceededException(timeout - t0, time - t0, docBase + doc); + } + //System.out.println(this+" collecting: "+doc+" "+(time-t0)); + collector.Collect(doc); + } + + public override void SetNextReader(IndexReader reader, int base_Renamed) + { + collector.SetNextReader(reader, base_Renamed); + this.docBase = base_Renamed; + } + + public override void SetScorer(Scorer scorer) + { + collector.SetScorer(scorer); + } + + public override bool AcceptsDocsOutOfOrder + { + get { return collector.AcceptsDocsOutOfOrder; } + } + + static TimeLimitingCollector() + { + { + TIMER_THREAD.Start(); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TopDocs.cs b/external/Lucene.Net.Light/src/core/Search/TopDocs.cs new file mode 100644 index 0000000000..142e7037de --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TopDocs.cs @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// Represents hits returned by + /// and + /// + [Serializable] + public class TopDocs + { + private int _totalHits; + private ScoreDoc[] _scoreDocs; + private float _maxScore; + + /// The total number of hits for the query. + public int TotalHits + { + get { return _totalHits; } + set { _totalHits = value; } + } + + /// The top hits for the query. + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1819:PropertiesShouldNotReturnArrays")] + public ScoreDoc[] ScoreDocs + { + get { return _scoreDocs; } + set { _scoreDocs = value; } + } + + /// + /// Gets or sets the maximum score value encountered, needed for normalizing. + /// Note that in case scores are not tracked, this returns . + /// + public float MaxScore + { + get { return _maxScore; } + set { _maxScore = value; } + } + + /// Constructs a TopDocs with a default maxScore=Float.NaN. + internal TopDocs(int totalHits, ScoreDoc[] scoreDocs):this(totalHits, scoreDocs, float.NaN) + { + } + + /// + public TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore) + { + this.TotalHits = totalHits; + this.ScoreDocs = scoreDocs; + this.MaxScore = maxScore; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TopDocsCollector.cs b/external/Lucene.Net.Light/src/core/Search/TopDocsCollector.cs new file mode 100644 index 0000000000..f947e5c8dc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TopDocsCollector.cs @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; + +namespace Lucene.Net.Search +{ + /// A base class for all collectors that return a output. This + /// collector allows easy extension by providing a single constructor which + /// accepts a as well as protected members for that + /// priority queue and a counter of the number of total hits.
+ /// Extending classes can override and + /// in order to provide their own implementation. + ///
+ public abstract class TopDocsCollector : Collector where T : ScoreDoc + { + + // This is used in case topDocs() is called with illegal parameters, or there + // simply aren't (enough) results. + protected internal static readonly TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], System.Single.NaN); + + /// The priority queue which holds the top documents. Note that different + /// implementations of PriorityQueue give different meaning to 'top documents'. + /// HitQueue for example aggregates the top scoring documents, while other PQ + /// implementations may hold documents sorted by other criteria. + /// + protected internal PriorityQueue pq; + + /// The total number of documents that the collector encountered. + protected internal int internalTotalHits; + + protected internal TopDocsCollector(PriorityQueue pq) + { + this.pq = pq; + } + + /// Populates the results array with the ScoreDoc instaces. This can be + /// overridden in case a different ScoreDoc type should be returned. + /// + protected internal virtual void PopulateResults(ScoreDoc[] results, int howMany) + { + for (int i = howMany - 1; i >= 0; i--) + { + results[i] = pq.Pop(); + } + } + + /// Returns a instance containing the given results. If + /// results is null it means there are no results to return, + /// either because there were 0 calls to collect() or because the arguments to + /// topDocs were invalid. + /// + public /*protected internal*/ virtual TopDocs NewTopDocs(ScoreDoc[] results, int start) + { + return results == null?EMPTY_TOPDOCS:new TopDocs(internalTotalHits, results); + } + + /// The total number of documents that matched this query. + public virtual int TotalHits + { + get { return internalTotalHits; } + } + + /// Returns the top docs that were collected by this collector. + public TopDocs TopDocs() + { + // In case pq was populated with sentinel values, there might be less + // results than pq.size(). Therefore return all results until either + // pq.size() or totalHits. + return TopDocs(0, internalTotalHits < pq.Size()?internalTotalHits:pq.Size()); + } + + /// Returns the documents in the rage [start .. pq.size()) that were collected + /// by this collector. Note that if start >= pq.size(), an empty TopDocs is + /// returned.
+ /// This method is convenient to call if the application allways asks for the + /// last results, starting from the last 'page'.
+ /// NOTE: you cannot call this method more than once for each search + /// execution. If you need to call it more than once, passing each time a + /// different start, you should call and work + /// with the returned object, which will contain all the + /// results this search execution collected. + ///
+ public TopDocs TopDocs(int start) + { + // In case pq was populated with sentinel values, there might be less + // results than pq.size(). Therefore return all results until either + // pq.size() or totalHits. + return TopDocs(start, internalTotalHits < pq.Size()?internalTotalHits:pq.Size()); + } + + /// Returns the documents in the rage [start .. start+howMany) that were + /// collected by this collector. Note that if start >= pq.size(), an empty + /// TopDocs is returned, and if pq.size() - start < howMany, then only the + /// available documents in [start .. pq.size()) are returned.
+ /// This method is useful to call in case pagination of search results is + /// allowed by the search application, as well as it attempts to optimize the + /// memory used by allocating only as much as requested by howMany.
+ /// NOTE: you cannot call this method more than once for each search + /// execution. If you need to call it more than once, passing each time a + /// different range, you should call and work with the + /// returned object, which will contain all the results this + /// search execution collected. + ///
+ public TopDocs TopDocs(int start, int howMany) + { + + // In case pq was populated with sentinel values, there might be less + // results than pq.size(). Therefore return all results until either + // pq.size() or totalHits. + int size = internalTotalHits < pq.Size()?internalTotalHits:pq.Size(); + + // Don't bother to throw an exception, just return an empty TopDocs in case + // the parameters are invalid or out of range. + if (start < 0 || start >= size || howMany <= 0) + { + return NewTopDocs(null, start); + } + + // We know that start < pqsize, so just fix howMany. + howMany = System.Math.Min(size - start, howMany); + ScoreDoc[] results = new ScoreDoc[howMany]; + + // pq's pop() returns the 'least' element in the queue, therefore need + // to discard the first ones, until we reach the requested range. + // Note that this loop will usually not be executed, since the common usage + // should be that the caller asks for the last howMany results. However it's + // needed here for completeness. + for (int i = pq.Size() - start - howMany; i > 0; i--) + { + pq.Pop(); + } + + // Get the requested results from pq. + PopulateResults(results, howMany); + + return NewTopDocs(results, start); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TopFieldCollector.cs b/external/Lucene.Net.Light/src/core/Search/TopFieldCollector.cs new file mode 100644 index 0000000000..1cfc4d3f30 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TopFieldCollector.cs @@ -0,0 +1,1137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Util; +using IndexReader = Lucene.Net.Index.IndexReader; +using Entry = Lucene.Net.Search.FieldValueHitQueue.Entry; + +namespace Lucene.Net.Search +{ + + /// A that sorts by using + /// s. + ///

+ /// See the method + /// for instantiating a TopFieldCollector. + /// + ///

NOTE: This API is experimental and might change in + /// incompatible ways in the next release.

+ ///

+ public abstract class TopFieldCollector : TopDocsCollector + { + // TODO: one optimization we could do is to pre-fill + // the queue with sentinel value that guaranteed to + // always compare lower than a real hit; this would + // save having to check queueFull on each insert + + // + // Implements a TopFieldCollector over one SortField criteria, without + // tracking document scores and maxScore. + // + private class OneComparatorNonScoringCollector : TopFieldCollector + { + internal FieldComparator comparator; + internal int reverseMul; + + public OneComparatorNonScoringCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + comparator = queue.GetComparators()[0]; + reverseMul = queue.GetReverseMul()[0]; + } + + internal void UpdateBottom(int doc) + { + // bottom.score is already set to Float.NaN in add(). + bottom.Doc = docBase + doc; + bottom = pq.UpdateTop(); + } + + public override void Collect(int doc) + { + ++internalTotalHits; + if (queueFull) + { + if ((reverseMul * comparator.CompareBottom(doc)) <= 0) + { + // since docs are visited in doc Id order, if compare is 0, it means + // this document is largest than anything else in the queue, and + // therefore not competitive. + return ; + } + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.Copy(bottom.slot, doc); + UpdateBottom(doc); + comparator.SetBottom(bottom.slot); + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + comparator.Copy(slot, doc); + Add(slot, doc, System.Single.NaN); + if (queueFull) + { + comparator.SetBottom(bottom.slot); + } + } + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + this.docBase = docBase; + comparator.SetNextReader(reader, docBase); + } + + public override void SetScorer(Scorer scorer) + { + comparator.SetScorer(scorer); + } + } + + // + // Implements a TopFieldCollector over one SortField criteria, without + // tracking document scores and maxScore, and assumes out of orderness in doc + // Ids collection. + // + private class OutOfOrderOneComparatorNonScoringCollector:OneComparatorNonScoringCollector + { + + public OutOfOrderOneComparatorNonScoringCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + } + + public override void Collect(int doc) + { + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + int cmp = reverseMul * comparator.CompareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.Doc)) + { + return ; + } + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.Copy(bottom.slot, doc); + UpdateBottom(doc); + comparator.SetBottom(bottom.slot); + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + comparator.Copy(slot, doc); + Add(slot, doc, System.Single.NaN); + if (queueFull) + { + comparator.SetBottom(bottom.slot); + } + } + } + + public override bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + /* + * Implements a TopFieldCollector over one SortField criteria, while tracking + * document scores but no maxScore. + */ + private class OneComparatorScoringNoMaxScoreCollector : OneComparatorNonScoringCollector + { + + internal Scorer scorer; + + public OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + } + + internal void UpdateBottom(int doc, float score) + { + bottom.Doc = docBase + doc; + bottom.Score = score; + bottom = pq.UpdateTop(); + } + + public override void Collect(int doc) + { + ++internalTotalHits; + if (queueFull) + { + if ((reverseMul * comparator.CompareBottom(doc)) <= 0) + { + // since docs are visited in doc Id order, if compare is 0, it means + // this document is largest than anything else in the queue, and + // therefore not competitive. + return ; + } + + // Compute the score only if the hit is competitive. + float score = scorer.Score(); + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.Copy(bottom.slot, doc); + UpdateBottom(doc, score); + comparator.SetBottom(bottom.slot); + } + else + { + // Compute the score only if the hit is competitive. + float score = scorer.Score(); + + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + comparator.Copy(slot, doc); + Add(slot, doc, score); + if (queueFull) + { + comparator.SetBottom(bottom.slot); + } + } + } + + public override void SetScorer(Scorer scorer) + { + this.scorer = scorer; + comparator.SetScorer(scorer); + } + } + + /* + * Implements a TopFieldCollector over one SortField criteria, while tracking + * document scores but no maxScore, and assumes out of orderness in doc Ids + * collection. + */ + private class OutOfOrderOneComparatorScoringNoMaxScoreCollector : OneComparatorScoringNoMaxScoreCollector + { + + public OutOfOrderOneComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + } + + public override void Collect(int doc) + { + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + int cmp = reverseMul * comparator.CompareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.Doc)) + { + return ; + } + + // Compute the score only if the hit is competitive. + float score = scorer.Score(); + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.Copy(bottom.slot, doc); + UpdateBottom(doc, score); + comparator.SetBottom(bottom.slot); + } + else + { + // Compute the score only if the hit is competitive. + float score = scorer.Score(); + + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + comparator.Copy(slot, doc); + Add(slot, doc, score); + if (queueFull) + { + comparator.SetBottom(bottom.slot); + } + } + } + + public override bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + // + // Implements a TopFieldCollector over one SortField criteria, with tracking + // document scores and maxScore. + // + private class OneComparatorScoringMaxScoreCollector:OneComparatorNonScoringCollector + { + + internal Scorer scorer; + + public OneComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN. + maxScore = System.Single.NegativeInfinity; + } + + internal void UpdateBottom(int doc, float score) + { + bottom.Doc = docBase + doc; + bottom.Score = score; + bottom = pq.UpdateTop(); + } + + public override void Collect(int doc) + { + float score = scorer.Score(); + if (score > maxScore) + { + maxScore = score; + } + ++internalTotalHits; + if (queueFull) + { + if ((reverseMul * comparator.CompareBottom(doc)) <= 0) + { + // since docs are visited in doc Id order, if compare is 0, it means + // this document is largest than anything else in the queue, and + // therefore not competitive. + return ; + } + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.Copy(bottom.slot, doc); + UpdateBottom(doc, score); + comparator.SetBottom(bottom.slot); + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + comparator.Copy(slot, doc); + Add(slot, doc, score); + if (queueFull) + { + comparator.SetBottom(bottom.slot); + } + } + } + + public override void SetScorer(Scorer scorer) + { + this.scorer = scorer; + base.SetScorer(scorer); + } + } + + // + // Implements a TopFieldCollector over one SortField criteria, with tracking + // document scores and maxScore, and assumes out of orderness in doc Ids + // collection. + // + private class OutOfOrderOneComparatorScoringMaxScoreCollector : OneComparatorScoringMaxScoreCollector + { + + public OutOfOrderOneComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + } + + public override void Collect(int doc) + { + float score = scorer.Score(); + if (score > maxScore) + { + maxScore = score; + } + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + int cmp = reverseMul * comparator.CompareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.Doc)) + { + return ; + } + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.Copy(bottom.slot, doc); + UpdateBottom(doc, score); + comparator.SetBottom(bottom.slot); + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + comparator.Copy(slot, doc); + Add(slot, doc, score); + if (queueFull) + { + comparator.SetBottom(bottom.slot); + } + } + } + + public override bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, without + * tracking document scores and maxScore. + */ + private class MultiComparatorNonScoringCollector:TopFieldCollector + { + internal FieldComparator[] comparators; + internal int[] reverseMul; + + public MultiComparatorNonScoringCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + comparators = queue.GetComparators(); + reverseMul = queue.GetReverseMul(); + } + + internal void UpdateBottom(int doc) + { + // bottom.score is already set to Float.NaN in add(). + bottom.Doc = docBase + doc; + bottom = pq.UpdateTop(); + } + + public override void Collect(int doc) + { + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + for (int i = 0; ; i++) + { + int c = reverseMul[i] * comparators[i].CompareBottom(doc); + if (c < 0) + { + // Definitely not competitive. + return ; + } + else if (c > 0) + { + // Definitely competitive. + break; + } + else if (i == comparators.Length - 1) + { + // Here c=0. If we're at the last comparator, this doc is not + // competitive, since docs are visited in doc Id order, which means + // this doc cannot compete with any other document in the queue. + return ; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(bottom.slot, doc); + } + + UpdateBottom(doc); + + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(slot, doc); + } + Add(slot, doc, System.Single.NaN); + if (queueFull) + { + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + } + } + + public override void SetNextReader(IndexReader reader, int docBase) + { + this.docBase = docBase; + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetNextReader(reader, docBase); + } + } + + public override void SetScorer(Scorer scorer) + { + // set the scorer on all comparators + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetScorer(scorer); + } + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, without + * tracking document scores and maxScore, and assumes out of orderness in doc + * Ids collection. + */ + private class OutOfOrderMultiComparatorNonScoringCollector:MultiComparatorNonScoringCollector + { + + public OutOfOrderMultiComparatorNonScoringCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + } + + public override void Collect(int doc) + { + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + for (int i = 0; ; i++) + { + int c = reverseMul[i] * comparators[i].CompareBottom(doc); + if (c < 0) + { + // Definitely not competitive. + return ; + } + else if (c > 0) + { + // Definitely competitive. + break; + } + else if (i == comparators.Length - 1) + { + // This is the equals case. + if (doc + docBase > bottom.Doc) + { + // Definitely not competitive + return ; + } + break; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(bottom.slot, doc); + } + + UpdateBottom(doc); + + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(slot, doc); + } + Add(slot, doc, System.Single.NaN); + if (queueFull) + { + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + } + } + + public override bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, with + * tracking document scores and maxScore. + */ + private class MultiComparatorScoringMaxScoreCollector : MultiComparatorNonScoringCollector + { + + internal Scorer scorer; + + public MultiComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN. + maxScore = System.Single.NegativeInfinity; + } + + internal void UpdateBottom(int doc, float score) + { + bottom.Doc = docBase + doc; + bottom.Score = score; + bottom = pq.UpdateTop(); + } + + public override void Collect(int doc) + { + float score = scorer.Score(); + if (score > maxScore) + { + maxScore = score; + } + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + for (int i = 0; ; i++) + { + int c = reverseMul[i] * comparators[i].CompareBottom(doc); + if (c < 0) + { + // Definitely not competitive. + return ; + } + else if (c > 0) + { + // Definitely competitive. + break; + } + else if (i == comparators.Length - 1) + { + // Here c=0. If we're at the last comparator, this doc is not + // competitive, since docs are visited in doc Id order, which means + // this doc cannot compete with any other document in the queue. + return ; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(bottom.slot, doc); + } + + UpdateBottom(doc, score); + + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(slot, doc); + } + Add(slot, doc, score); + if (queueFull) + { + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + } + } + + public override void SetScorer(Scorer scorer) + { + this.scorer = scorer; + base.SetScorer(scorer); + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, with + * tracking document scores and maxScore, and assumes out of orderness in doc + * Ids collection. + */ + private sealed class OutOfOrderMultiComparatorScoringMaxScoreCollector:MultiComparatorScoringMaxScoreCollector + { + + public OutOfOrderMultiComparatorScoringMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + } + + public override void Collect(int doc) + { + float score = scorer.Score(); + if (score > maxScore) + { + maxScore = score; + } + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + for (int i = 0; ; i++) + { + int c = reverseMul[i] * comparators[i].CompareBottom(doc); + if (c < 0) + { + // Definitely not competitive. + return ; + } + else if (c > 0) + { + // Definitely competitive. + break; + } + else if (i == comparators.Length - 1) + { + // This is the equals case. + if (doc + docBase > bottom.Doc) + { + // Definitely not competitive + return ; + } + break; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(bottom.slot, doc); + } + + UpdateBottom(doc, score); + + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(slot, doc); + } + Add(slot, doc, score); + if (queueFull) + { + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + } + } + + public override bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, with + * tracking document scores and maxScore. + */ + private class MultiComparatorScoringNoMaxScoreCollector:MultiComparatorNonScoringCollector + { + + internal Scorer scorer; + + public MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + } + + internal void UpdateBottom(int doc, float score) + { + bottom.Doc = docBase + doc; + bottom.Score = score; + bottom = pq.UpdateTop(); + } + + public override void Collect(int doc) + { + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + for (int i = 0; ; i++) + { + int c = reverseMul[i] * comparators[i].CompareBottom(doc); + if (c < 0) + { + // Definitely not competitive. + return ; + } + else if (c > 0) + { + // Definitely competitive. + break; + } + else if (i == comparators.Length - 1) + { + // Here c=0. If we're at the last comparator, this doc is not + // competitive, since docs are visited in doc Id order, which means + // this doc cannot compete with any other document in the queue. + return ; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(bottom.slot, doc); + } + + // Compute score only if it is competitive. + float score = scorer.Score(); + UpdateBottom(doc, score); + + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(slot, doc); + } + + // Compute score only if it is competitive. + float score = scorer.Score(); + Add(slot, doc, score); + if (queueFull) + { + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + } + } + + public override void SetScorer(Scorer scorer) + { + this.scorer = scorer; + base.SetScorer(scorer); + } + } + + /* + * Implements a TopFieldCollector over multiple SortField criteria, with + * tracking document scores and maxScore, and assumes out of orderness in doc + * Ids collection. + */ + private sealed class OutOfOrderMultiComparatorScoringNoMaxScoreCollector:MultiComparatorScoringNoMaxScoreCollector + { + + public OutOfOrderMultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueue queue, int numHits, bool fillFields):base(queue, numHits, fillFields) + { + } + + public override void Collect(int doc) + { + ++internalTotalHits; + if (queueFull) + { + // Fastmatch: return if this hit is not competitive + for (int i = 0; ; i++) + { + int c = reverseMul[i] * comparators[i].CompareBottom(doc); + if (c < 0) + { + // Definitely not competitive. + return ; + } + else if (c > 0) + { + // Definitely competitive. + break; + } + else if (i == comparators.Length - 1) + { + // This is the equals case. + if (doc + docBase > bottom.Doc) + { + // Definitely not competitive + return ; + } + break; + } + } + + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(bottom.slot, doc); + } + + // Compute score only if it is competitive. + float score = scorer.Score(); + UpdateBottom(doc, score); + + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + else + { + // Startup transient: queue hasn't gathered numHits yet + int slot = internalTotalHits - 1; + // Copy hit into queue + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].Copy(slot, doc); + } + + // Compute score only if it is competitive. + float score = scorer.Score(); + Add(slot, doc, score); + if (queueFull) + { + for (int i = 0; i < comparators.Length; i++) + { + comparators[i].SetBottom(bottom.slot); + } + } + } + } + + public override void SetScorer(Scorer scorer) + { + this.scorer = scorer; + base.SetScorer(scorer); + } + + public override bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + private static readonly ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0]; + + private bool fillFields; + + /* + * Stores the maximum score value encountered, needed for normalizing. If + * document scores are not tracked, this value is initialized to NaN. + */ + internal float maxScore = System.Single.NaN; + + internal int numHits; + internal FieldValueHitQueue.Entry bottom = null; + internal bool queueFull; + internal int docBase; + + // Declaring the constructor private prevents extending this class by anyone + // else. Note that the class cannot be final since it's extended by the + // internal versions. If someone will define a constructor with any other + // visibility, then anyone will be able to extend the class, which is not what + // we want. + private TopFieldCollector(PriorityQueue pq, int numHits, bool fillFields) + : base(pq) + { + this.numHits = numHits; + this.fillFields = fillFields; + } + + /// Creates a new from the given + /// arguments. + /// + ///

NOTE: The instances returned by this method + /// pre-allocate a full array of length + /// numHits. + /// + ///

+ /// the sort criteria (SortFields). + /// + /// the number of results to collect. + /// + /// specifies whether the actual field values should be returned on + /// the results (FieldDoc). + /// + /// specifies whether document scores should be tracked and set on the + /// results. Note that if set to false, then the results' scores will + /// be set to Float.NaN. Setting this to true affects performance, as + /// it incurs the score computation on each competitive result. + /// Therefore if document scores are not required by the application, + /// it is recommended to set it to false. + /// + /// specifies whether the query's maxScore should be tracked and set + /// on the resulting . Note that if set to false, + /// returns Float.NaN. Setting this to + /// true affects performance as it incurs the score computation on + /// each result. Also, setting this true automatically sets + /// trackDocScores to true as well. + /// + /// specifies whether documents are scored in doc Id order or not by + /// the given in . + /// + /// a instance which will sort the results by + /// the sort criteria. + /// + /// IOException + public static TopFieldCollector Create(Sort sort, int numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder) + { + if (sort.fields.Length == 0) + { + throw new System.ArgumentException("Sort must contain at least one field"); + } + + FieldValueHitQueue queue = FieldValueHitQueue.Create(sort.fields, numHits); + if (queue.GetComparators().Length == 1) + { + if (docsScoredInOrder) + { + if (trackMaxScore) + { + return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields); + } + else if (trackDocScores) + { + return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields); + } + else + { + return new OneComparatorNonScoringCollector(queue, numHits, fillFields); + } + } + else + { + if (trackMaxScore) + { + return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields); + } + else if (trackDocScores) + { + return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields); + } + else + { + return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields); + } + } + } + + // multiple comparators. + if (docsScoredInOrder) + { + if (trackMaxScore) + { + return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields); + } + else if (trackDocScores) + { + return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields); + } + else + { + return new MultiComparatorNonScoringCollector(queue, numHits, fillFields); + } + } + else + { + if (trackMaxScore) + { + return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields); + } + else if (trackDocScores) + { + return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields); + } + else + { + return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields); + } + } + } + + internal void Add(int slot, int doc, float score) + { + bottom = pq.Add(new Entry(slot, docBase + doc, score)); + queueFull = internalTotalHits == numHits; + } + + /* + * Only the following callback methods need to be overridden since + * topDocs(int, int) calls them to return the results. + */ + + protected internal override void PopulateResults(ScoreDoc[] results, int howMany) + { + if (fillFields) + { + // avoid casting if unnecessary. + FieldValueHitQueue queue = (FieldValueHitQueue) pq; + for (int i = howMany - 1; i >= 0; i--) + { + results[i] = queue.FillFields(queue.Pop()); + } + } + else + { + for (int i = howMany - 1; i >= 0; i--) + { + Entry entry = pq.Pop(); + results[i] = new FieldDoc(entry.Doc, entry.Score); + } + } + } + + public /*protected internal*/ override TopDocs NewTopDocs(ScoreDoc[] results, int start) + { + if (results == null) + { + results = EMPTY_SCOREDOCS; + // Set maxScore to NaN, in case this is a maxScore tracking collector. + maxScore = System.Single.NaN; + } + + // If this is a maxScoring tracking collector and there were no results, + return new TopFieldDocs(internalTotalHits, results, ((FieldValueHitQueue) pq).GetFields(), maxScore); + } + + public override bool AcceptsDocsOutOfOrder + { + get { return false; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TopFieldDocs.cs b/external/Lucene.Net.Light/src/core/Search/TopFieldDocs.cs new file mode 100644 index 0000000000..4d8662f7aa --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TopFieldDocs.cs @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Search +{ + + /// + /// Represents hits returned by . + /// + [Serializable] + public class TopFieldDocs:TopDocs + { + + /// The fields which were used to sort results by. + public SortField[] fields; + + /// Creates one of these objects. + /// Total number of hits for the query. + /// + /// The top hits for the query. + /// + /// The sort criteria used to find the top hits. + /// + /// The maximum score encountered. + /// + public TopFieldDocs(int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore):base(totalHits, scoreDocs, maxScore) + { + this.fields = fields; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/TopScoreDocCollector.cs b/external/Lucene.Net.Light/src/core/Search/TopScoreDocCollector.cs new file mode 100644 index 0000000000..e5a1234b2b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/TopScoreDocCollector.cs @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// A implementation that collects the top-scoring hits, + /// returning them as a . This is used by to + /// implement -based search. Hits are sorted by score descending + /// and then (when the scores are tied) docID ascending. When you create an + /// instance of this collector you should know in advance whether documents are + /// going to be collected in doc Id order or not. + /// + ///

NOTE: The values and + /// are not valid scores. This + /// collector will not properly collect hits with such + /// scores. + ///

+ public abstract class TopScoreDocCollector : TopDocsCollector + { + + // Assumes docs are scored in order. + private class InOrderTopScoreDocCollector:TopScoreDocCollector + { + internal InOrderTopScoreDocCollector(int numHits):base(numHits) + { + } + + public override void Collect(int doc) + { + float score = scorer.Score(); + + // This collector cannot handle these scores: + System.Diagnostics.Debug.Assert(score != float.NegativeInfinity); + System.Diagnostics.Debug.Assert(!float.IsNaN(score)); + + internalTotalHits++; + if (score <= pqTop.Score) + { + // Since docs are returned in-order (i.e., increasing doc Id), a document + // with equal score to pqTop.score cannot compete since HitQueue favors + // documents with lower doc Ids. Therefore reject those docs too. + return ; + } + pqTop.Doc = doc + docBase; + pqTop.Score = score; + pqTop = pq.UpdateTop(); + } + + public override bool AcceptsDocsOutOfOrder + { + get { return false; } + } + } + + // Assumes docs are scored out of order. + private class OutOfOrderTopScoreDocCollector:TopScoreDocCollector + { + internal OutOfOrderTopScoreDocCollector(int numHits):base(numHits) + { + } + + public override void Collect(int doc) + { + float score = scorer.Score(); + + // This collector cannot handle NaN + System.Diagnostics.Debug.Assert(!float.IsNaN(score)); + + internalTotalHits++; + doc += docBase; + if (score < pqTop.Score || (score == pqTop.Score && doc > pqTop.Doc)) + { + return ; + } + pqTop.Doc = doc; + pqTop.Score = score; + pqTop = pq.UpdateTop(); + } + + public override bool AcceptsDocsOutOfOrder + { + get { return true; } + } + } + + /// Creates a new given the number of hits to + /// collect and whether documents are scored in order by the input + /// to . + /// + ///

NOTE: The instances returned by this method + /// pre-allocate a full array of length + /// numHits, and fill the array with sentinel + /// objects. + ///

+ public static TopScoreDocCollector Create(int numHits, bool docsScoredInOrder) + { + + if (docsScoredInOrder) + { + return new InOrderTopScoreDocCollector(numHits); + } + else + { + return new OutOfOrderTopScoreDocCollector(numHits); + } + } + + internal ScoreDoc pqTop; + internal int docBase = 0; + internal Scorer scorer; + + // prevents instantiation + private TopScoreDocCollector(int numHits):base(new HitQueue(numHits, true)) + { + // HitQueue implements getSentinelObject to return a ScoreDoc, so we know + // that at this point top() is already initialized. + pqTop = pq.Top(); + } + + public /*protected internal*/ override TopDocs NewTopDocs(ScoreDoc[] results, int start) + { + if (results == null) + { + return EMPTY_TOPDOCS; + } + + // We need to compute maxScore in order to set it in TopDocs. If start == 0, + // it means the largest element is already in results, use its score as + // maxScore. Otherwise pop everything else, until the largest element is + // extracted and use its score as maxScore. + float maxScore = System.Single.NaN; + if (start == 0) + { + maxScore = results[0].Score; + } + else + { + for (int i = pq.Size(); i > 1; i--) + { + pq.Pop(); + } + maxScore = pq.Pop().Score; + } + + return new TopDocs(internalTotalHits, results, maxScore); + } + + public override void SetNextReader(IndexReader reader, int base_Renamed) + { + docBase = base_Renamed; + } + + public override void SetScorer(Scorer scorer) + { + this.scorer = scorer; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/Weight.cs b/external/Lucene.Net.Light/src/core/Search/Weight.cs new file mode 100644 index 0000000000..7e274a1bdc --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/Weight.cs @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Search +{ + + /// Expert: Calculate query weights and build query scorers. + ///

+ /// The purpose of is to ensure searching does not + /// modify a , so that a instance can be reused.
+ /// dependent state of the query should reside in the + /// .
+ /// dependent state should reside in the . + ///

+ /// A Weight is used in the following way: + /// + /// A Weight is constructed by a top-level query, given a + /// Searcher (). + /// The method is called on the + /// Weight to compute the query normalization factor + /// of the query clauses contained in the + /// query. + /// The query normalization factor is passed to . At + /// this point the weighting is complete. + /// A Scorer is constructed by . + /// + /// + ///

+ /// 2.9 + /// + [Serializable] + public abstract class Weight + { + + /// An explanation of the score computation for the named document. + /// + /// + /// sub-reader containing the give doc + /// + /// + /// + /// an Explanation for the score + /// + /// IOException + public abstract Explanation Explain(IndexReader reader, int doc); + + /// The query that this concerns. + public abstract Query Query { get; } + + /// The weight for this query. + public abstract float Value { get; } + + /// Assigns the query normalization factor to this. + public abstract void Normalize(float norm); + + /// Returns a which scores documents in/out-of order according + /// to scoreDocsInOrder. + ///

+ /// NOTE: even if scoreDocsInOrder is false, it is + /// recommended to check whether the returned Scorer indeed scores + /// documents out of order (i.e., call ), as + /// some Scorer implementations will always return documents + /// in-order.
+ /// NOTE: null can be returned if no documents will be scored by this + /// query. + /// + ///

+ /// + /// the for which to return the . + /// + /// specifies whether in-order scoring of documents is required. Note + /// that if set to false (i.e., out-of-order scoring is required), + /// this method can return whatever scoring mode it supports, as every + /// in-order scorer is also an out-of-order one. However, an + /// out-of-order scorer may not support + /// and/or , therefore it is recommended to + /// request an in-order scorer if use of these methods is required. + /// + /// + /// if true, will be called; if false, + /// and/or will + /// be called. + /// + /// a which scores documents in/out-of order. + /// + /// IOException + public abstract Scorer Scorer(IndexReader reader, bool scoreDocsInOrder, bool topScorer); + + /// The sum of squared weights of contained query clauses. + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public abstract float GetSumOfSquaredWeights(); + + /// Returns true iff this implementation scores docs only out of order. This + /// method is used in conjunction with 's + /// AcceptsDocsOutOfOrder and + /// to + /// create a matching instance for a given , or + /// vice versa. + ///

+ /// NOTE: the default implementation returns false, i.e. + /// the Scorer scores documents in-order. + ///

+ [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual bool GetScoresDocsOutOfOrder() + { + return false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/WildcardQuery.cs b/external/Lucene.Net.Light/src/core/Search/WildcardQuery.cs new file mode 100644 index 0000000000..fe024275d0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/WildcardQuery.cs @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; +using ToStringUtils = Lucene.Net.Util.ToStringUtils; + +namespace Lucene.Net.Search +{ + + /// Implements the wildcard search query. Supported wildcards are *, which + /// matches any character sequence (including the empty one), and ?, + /// which matches any single character. Note this query can be slow, as it + /// needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, + /// a Wildcard term should not start with one of the wildcards * or + /// ?. + /// + ///

This query uses the + /// + /// rewrite method. + /// + ///

+ /// + /// + [Serializable] + public class WildcardQuery : MultiTermQuery + { + private readonly bool _termContainsWildcard; + private readonly bool _termIsPrefix; + protected internal Term internalTerm; + + public WildcardQuery(Term term) + { + this.internalTerm = term; + string text = term.Text; + _termContainsWildcard = (term.Text.IndexOf('*') != -1) + || (term.Text.IndexOf('?') != -1); + _termIsPrefix = _termContainsWildcard + && (text.IndexOf('?') == -1) + && (text.IndexOf('*') == text.Length - 1); + } + + protected internal override FilteredTermEnum GetEnum(IndexReader reader) + { + if (_termContainsWildcard) + { + return new WildcardTermEnum(reader, Term); + } + else + { + return new SingleTermEnum(reader, Term); + } + } + + /// Returns the pattern term. + public Term Term + { + get { return internalTerm; } + } + + public override Query Rewrite(IndexReader reader) + { + if (_termIsPrefix) + { + MultiTermQuery rewritten = + new PrefixQuery(internalTerm.CreateTerm(internalTerm.Text.Substring(0, internalTerm.Text.IndexOf('*')))); + rewritten.Boost = Boost; + rewritten.RewriteMethod = RewriteMethod; + return rewritten; + } + else + { + return base.Rewrite(reader); + } + } + + /// Prints a user-readable version of this query. + public override System.String ToString(System.String field) + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + if (!internalTerm.Field.Equals(field)) + { + buffer.Append(internalTerm.Field); + buffer.Append(":"); + } + buffer.Append(internalTerm.Text); + buffer.Append(ToStringUtils.Boost(Boost)); + return buffer.ToString(); + } + + //@Override + public override int GetHashCode() + { + int prime = 31; + int result = base.GetHashCode(); + result = prime * result + ((internalTerm == null)?0:internalTerm.GetHashCode()); + return result; + } + + //@Override + public override bool Equals(System.Object obj) + { + if (this == obj) + return true; + if (!base.Equals(obj)) + return false; + if (GetType() != obj.GetType()) + return false; + WildcardQuery other = (WildcardQuery) obj; + if (internalTerm == null) + { + if (other.internalTerm != null) + return false; + } + else if (!internalTerm.Equals(other.internalTerm)) + return false; + return true; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Search/WildcardTermEnum.cs b/external/Lucene.Net.Light/src/core/Search/WildcardTermEnum.cs new file mode 100644 index 0000000000..f2d555f5e0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Search/WildcardTermEnum.cs @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexReader = Lucene.Net.Index.IndexReader; +using Term = Lucene.Net.Index.Term; + +namespace Lucene.Net.Search +{ + + /// Subclass of FilteredTermEnum for enumerating all terms that match the + /// specified wildcard filter term. + ///

+ /// Term enumerations are always ordered by Term.compareTo(). Each term in + /// the enumeration is greater than all that precede it. + ///

+ public class WildcardTermEnum : FilteredTermEnum + { + internal Term searchTerm; + internal System.String field; + internal System.String text; + internal System.String pre; + internal int preLen; + internal bool endEnum = false; + + /// Creates a new WildcardTermEnum. + ///

+ /// After calling the constructor the enumeration is already pointing to the first + /// valid term if such a term exists. + ///

+ public WildcardTermEnum(IndexReader reader, Term term):base() + { + searchTerm = term; + field = searchTerm.Field; + System.String searchTermText = searchTerm.Text; + + int sidx = searchTermText.IndexOf((System.Char) WILDCARD_STRING); + int cidx = searchTermText.IndexOf((System.Char) WILDCARD_CHAR); + int idx = sidx; + if (idx == - 1) + { + idx = cidx; + } + else if (cidx >= 0) + { + idx = System.Math.Min(idx, cidx); + } + pre = idx != - 1?searchTerm.Text.Substring(0, (idx) - (0)):""; + + preLen = pre.Length; + text = searchTermText.Substring(preLen); + SetEnum(reader.Terms(new Term(searchTerm.Field, pre))); + } + + /*protected internal*/ protected internal override bool TermCompare(Term term) + { + if ((System.Object) field == (System.Object) term.Field) + { + System.String searchText = term.Text; + if (searchText.StartsWith(pre)) + { + return WildcardEquals(text, 0, searchText, preLen); + } + } + endEnum = true; + return false; + } + + public override float Difference() + { + return 1.0f; + } + + public override bool EndEnum() + { + return endEnum; + } + + /// ***************************************** + /// String equality with support for wildcards + /// ****************************************** + /// + + public const char WILDCARD_STRING = '*'; + public const char WILDCARD_CHAR = '?'; + + /// Determines if a word matches a wildcard pattern. + /// Work released by Granta Design Ltd after originally being done on + /// company time. + /// + public static bool WildcardEquals(System.String pattern, int patternIdx, System.String string_Renamed, int stringIdx) + { + int p = patternIdx; + + for (int s = stringIdx; ; ++p, ++s) + { + // End of string yet? + bool sEnd = (s >= string_Renamed.Length); + // End of pattern yet? + bool pEnd = (p >= pattern.Length); + + // If we're looking at the end of the string... + if (sEnd) + { + // Assume the only thing left on the pattern is/are wildcards + bool justWildcardsLeft = true; + + // Current wildcard position + int wildcardSearchPos = p; + // While we haven't found the end of the pattern, + // and haven't encountered any non-wildcard characters + while (wildcardSearchPos < pattern.Length && justWildcardsLeft) + { + // Check the character at the current position + char wildchar = pattern[wildcardSearchPos]; + + // If it's not a wildcard character, then there is more + // pattern information after this/these wildcards. + if (wildchar != WILDCARD_CHAR && wildchar != WILDCARD_STRING) + { + justWildcardsLeft = false; + } + else + { + // to prevent "cat" matches "ca??" + if (wildchar == WILDCARD_CHAR) + { + return false; + } + + // Look at the next character + wildcardSearchPos++; + } + } + + // This was a prefix wildcard search, and we've matched, so + // return true. + if (justWildcardsLeft) + { + return true; + } + } + + // If we've gone past the end of the string, or the pattern, + // return false. + if (sEnd || pEnd) + { + break; + } + + // Match a single character, so continue. + if (pattern[p] == WILDCARD_CHAR) + { + continue; + } + + // + if (pattern[p] == WILDCARD_STRING) + { + // Look at the character beyond the '*' characters. + while (p < pattern.Length && pattern[p] == WILDCARD_STRING) + ++p; + // Examine the string, starting at the last character. + for (int i = string_Renamed.Length; i >= s; --i) + { + if (WildcardEquals(pattern, p, string_Renamed, i)) + { + return true; + } + } + break; + } + if (pattern[p] != string_Renamed[s]) + { + break; + } + } + return false; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/AlreadyClosedException.cs b/external/Lucene.Net.Light/src/core/Store/AlreadyClosedException.cs new file mode 100644 index 0000000000..15188bfd4d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/AlreadyClosedException.cs @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Runtime.Serialization; + +namespace Lucene.Net.Store +{ + /// This exception is thrown when there is an attempt to + /// access something that has already been closed. + /// + [Serializable] + public class AlreadyClosedException : SystemException + { + public AlreadyClosedException() + { + } + + public AlreadyClosedException(string message) : base(message) + { + } + + public AlreadyClosedException(string message, Exception inner) : base(message, inner) + { + } + + protected AlreadyClosedException( + SerializationInfo info, + StreamingContext context) : base(info, context) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/BufferedIndexInput.cs b/external/Lucene.Net.Light/src/core/Store/BufferedIndexInput.cs new file mode 100644 index 0000000000..55c8fdc362 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/BufferedIndexInput.cs @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// Base implementation class for buffered . + public abstract class BufferedIndexInput : IndexInput, System.ICloneable + { + + /// Default buffer size + public const int BUFFER_SIZE = 1024; + + private int _bufferSize = BUFFER_SIZE; + + protected internal byte[] buffer; + + private long bufferStart = 0; // position in file of buffer + private int bufferLength = 0; // end of valid bytes + private int bufferPosition = 0; // next byte to read + + public override byte ReadByte() + { + if (bufferPosition >= bufferLength) + Refill(); + return buffer[bufferPosition++]; + } + + protected BufferedIndexInput() + { + } + + /// Inits BufferedIndexInput with a specific bufferSize + protected BufferedIndexInput(int bufferSize) + { + CheckBufferSize(bufferSize); + this._bufferSize = bufferSize; + } + + /// Change the buffer size used by this IndexInput + public virtual void SetBufferSize(int newSize) + { + System.Diagnostics.Debug.Assert(buffer == null || _bufferSize == buffer.Length, "buffer=" + buffer + " bufferSize=" + _bufferSize + " buffer.length=" +(buffer != null ? buffer.Length: 0)); + if (newSize != _bufferSize) + { + CheckBufferSize(newSize); + _bufferSize = newSize; + if (buffer != null) + { + // Resize the existing buffer and carefully save as + // many bytes as possible starting from the current + // bufferPosition + byte[] newBuffer = new byte[newSize]; + int leftInBuffer = bufferLength - bufferPosition; + int numToCopy; + if (leftInBuffer > newSize) + numToCopy = newSize; + else + numToCopy = leftInBuffer; + Array.Copy(buffer, bufferPosition, newBuffer, 0, numToCopy); + bufferStart += bufferPosition; + bufferPosition = 0; + bufferLength = numToCopy; + NewBuffer(newBuffer); + } + } + } + + protected internal virtual void NewBuffer(byte[] newBuffer) + { + // Subclasses can do something here + buffer = newBuffer; + } + + /// + /// + public virtual int BufferSize + { + get { return _bufferSize; } + } + + private void CheckBufferSize(int bufferSize) + { + if (bufferSize <= 0) + throw new System.ArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")"); + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + ReadBytes(b, offset, len, true); + } + + public override void ReadBytes(byte[] b, int offset, int len, bool useBuffer) + { + + if (len <= (bufferLength - bufferPosition)) + { + // the buffer contains enough data to satisfy this request + if (len > 0) + // to allow b to be null if len is 0... + Array.Copy(buffer, bufferPosition, b, offset, len); + bufferPosition += len; + } + else + { + // the buffer does not have enough data. First serve all we've got. + int available = bufferLength - bufferPosition; + if (available > 0) + { + Array.Copy(buffer, bufferPosition, b, offset, available); + offset += available; + len -= available; + bufferPosition += available; + } + // and now, read the remaining 'len' bytes: + if (useBuffer && len < _bufferSize) + { + // If the amount left to read is small enough, and + // we are allowed to use our buffer, do it in the usual + // buffered way: fill the buffer and copy from it: + Refill(); + if (bufferLength < len) + { + // Throw an exception when refill() could not read len bytes: + Array.Copy(buffer, 0, b, offset, bufferLength); + throw new System.IO.IOException("read past EOF"); + } + else + { + Array.Copy(buffer, 0, b, offset, len); + bufferPosition = len; + } + } + else + { + // The amount left to read is larger than the buffer + // or we've been asked to not use our buffer - + // there's no performance reason not to read it all + // at once. Note that unlike the previous code of + // this function, there is no need to do a seek + // here, because there's no need to reread what we + // had in the buffer. + long after = bufferStart + bufferPosition + len; + if (after > Length()) + throw new System.IO.IOException("read past EOF"); + ReadInternal(b, offset, len); + bufferStart = after; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read + } + } + } + + private void Refill() + { + long start = bufferStart + bufferPosition; + long end = start + _bufferSize; + if (end > Length()) + // don't read past EOF + end = Length(); + int newLength = (int) (end - start); + if (newLength <= 0) + throw new System.IO.IOException("read past EOF"); + + if (buffer == null) + { + NewBuffer(new byte[_bufferSize]); // allocate buffer lazily + SeekInternal(bufferStart); + } + ReadInternal(buffer, 0, newLength); + bufferLength = newLength; + bufferStart = start; + bufferPosition = 0; + } + + /// Expert: implements buffer refill. Reads bytes from the current position + /// in the input. + /// + /// the array to read bytes into + /// + /// the offset in the array to start storing bytes + /// + /// the number of bytes to read + /// + public abstract void ReadInternal(byte[] b, int offset, int length); + + public override long FilePointer + { + get { return bufferStart + bufferPosition; } + } + + public override void Seek(long pos) + { + if (pos >= bufferStart && pos < (bufferStart + bufferLength)) + bufferPosition = (int) (pos - bufferStart); + // seek within buffer + else + { + bufferStart = pos; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read() + SeekInternal(pos); + } + } + + /// Expert: implements seek. Sets current position in this file, where the + /// next will occur. + /// + /// + /// + public abstract void SeekInternal(long pos); + + public override System.Object Clone() + { + BufferedIndexInput clone = (BufferedIndexInput) base.Clone(); + + clone.buffer = null; + clone.bufferLength = 0; + clone.bufferPosition = 0; + clone.bufferStart = FilePointer; + + return clone; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/BufferedIndexOutput.cs b/external/Lucene.Net.Light/src/core/Store/BufferedIndexOutput.cs new file mode 100644 index 0000000000..af805a121b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/BufferedIndexOutput.cs @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// Base implementation class for buffered . + public abstract class BufferedIndexOutput:IndexOutput + { + internal const int BUFFER_SIZE = 16384; + + private byte[] buffer = new byte[BUFFER_SIZE]; + private long bufferStart = 0; // position in file of buffer + private int bufferPosition = 0; // position in buffer + + private bool isDisposed; + + /// Writes a single byte. + /// + /// + public override void WriteByte(byte b) + { + if (bufferPosition >= BUFFER_SIZE) + Flush(); + buffer[bufferPosition++] = b; + } + + /// Writes an array of bytes. + /// the bytes to write + /// + /// the number of bytes to write + /// + /// + /// + public override void WriteBytes(byte[] b, int offset, int length) + { + int bytesLeft = BUFFER_SIZE - bufferPosition; + // is there enough space in the buffer? + if (bytesLeft >= length) + { + // we add the data to the end of the buffer + Array.Copy(b, offset, buffer, bufferPosition, length); + bufferPosition += length; + // if the buffer is full, flush it + if (BUFFER_SIZE - bufferPosition == 0) + Flush(); + } + else + { + // is data larger then buffer? + if (length > BUFFER_SIZE) + { + // we flush the buffer + if (bufferPosition > 0) + Flush(); + // and write data at once + FlushBuffer(b, offset, length); + bufferStart += length; + } + else + { + // we fill/flush the buffer (until the input is written) + int pos = 0; // position in the input data + int pieceLength; + while (pos < length) + { + pieceLength = (length - pos < bytesLeft)?length - pos:bytesLeft; + Array.Copy(b, pos + offset, buffer, bufferPosition, pieceLength); + pos += pieceLength; + bufferPosition += pieceLength; + // if the buffer is full, flush it + bytesLeft = BUFFER_SIZE - bufferPosition; + if (bytesLeft == 0) + { + Flush(); + bytesLeft = BUFFER_SIZE; + } + } + } + } + } + + /// Forces any buffered output to be written. + public override void Flush() + { + FlushBuffer(buffer, bufferPosition); + bufferStart += bufferPosition; + bufferPosition = 0; + } + + /// Expert: implements buffer write. Writes bytes at the current position in + /// the output. + /// + /// the bytes to write + /// + /// the number of bytes to write + /// + private void FlushBuffer(byte[] b, int len) + { + FlushBuffer(b, 0, len); + } + + /// Expert: implements buffer write. Writes bytes at the current position in + /// the output. + /// + /// the bytes to write + /// + /// the offset in the byte array + /// + /// the number of bytes to write + /// + public abstract void FlushBuffer(byte[] b, int offset, int len); + + /// Closes this stream to further operations. + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + Flush(); + } + + isDisposed = true; + } + + /// Returns the current position in this file, where the next write will + /// occur. + /// + /// + /// + public override long FilePointer + { + get { return bufferStart + bufferPosition; } + } + + /// Sets current position in this file, where the next write will occur. + /// + /// + public override void Seek(long pos) + { + Flush(); + bufferStart = pos; + } + + /// The number of bytes in the file. + public abstract override long Length { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/CheckSumIndexInput.cs b/external/Lucene.Net.Light/src/core/Store/CheckSumIndexInput.cs new file mode 100644 index 0000000000..d8cfaf621a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/CheckSumIndexInput.cs @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Store +{ + + /// Writes bytes through to a primary IndexOutput, computing + /// checksum as it goes. Note that you cannot use seek(). + /// + public class ChecksumIndexInput : IndexInput + { + internal IndexInput main; + internal IChecksum digest; + + private bool isDisposed; + + public ChecksumIndexInput(IndexInput main) + { + this.main = main; + digest = new CRC32(); + } + + public override byte ReadByte() + { + byte b = main.ReadByte(); + digest.Update(b); + return b; + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + main.ReadBytes(b, offset, len); + digest.Update(b, offset, len); + } + + public virtual long Checksum + { + get { return digest.Value; } + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (main != null) + { + main.Dispose(); + } + } + + main = null; + isDisposed = true; + } + + public override long FilePointer + { + get { return main.FilePointer; } + } + + public override void Seek(long pos) + { + throw new System.SystemException("not allowed"); + } + + public override long Length() + { + return main.Length(); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/CheckSumIndexOutput.cs b/external/Lucene.Net.Light/src/core/Store/CheckSumIndexOutput.cs new file mode 100644 index 0000000000..9abe54f0a6 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/CheckSumIndexOutput.cs @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using CRC32 = Lucene.Net.Support.CRC32; + +namespace Lucene.Net.Store +{ + + /// Writes bytes through to a primary IndexOutput, computing + /// checksum. Note that you cannot use seek(). + /// + public class ChecksumIndexOutput:IndexOutput + { + internal IndexOutput main; + internal IChecksum digest; + + private bool isDisposed; + + public ChecksumIndexOutput(IndexOutput main) + { + this.main = main; + digest = new CRC32(); + } + + public override void WriteByte(byte b) + { + digest.Update(b); + main.WriteByte(b); + } + + public override void WriteBytes(byte[] b, int offset, int length) + { + digest.Update(b, offset, length); + main.WriteBytes(b, offset, length); + } + + public virtual long Checksum + { + get { return digest.Value; } + } + + public override void Flush() + { + main.Flush(); + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + main.Close(); + } + + isDisposed = true; + } + + public override long FilePointer + { + get { return main.FilePointer; } + } + + public override void Seek(long pos) + { + throw new System.SystemException("not allowed"); + } + + /// Starts but does not complete the commit of this file (= + /// writing of the final checksum at the end). After this + /// is called must call and the + /// to complete the commit. + /// + public virtual void PrepareCommit() + { + long checksum = Checksum; + // Intentionally write a mismatched checksum. This is + // because we want to 1) test, as best we can, that we + // are able to write a long to the file, but 2) not + // actually "commit" the file yet. This (prepare + // commit) is phase 1 of a two-phase commit. + long pos = main.FilePointer; + main.WriteLong(checksum - 1); + main.Flush(); + main.Seek(pos); + } + + /// See + public virtual void FinishCommit() + { + main.WriteLong(Checksum); + } + + public override long Length + { + get { return main.Length; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/Directory.cs b/external/Lucene.Net.Light/src/core/Store/Directory.cs new file mode 100644 index 0000000000..4ec45a2402 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/Directory.cs @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using IndexFileNameFilter = Lucene.Net.Index.IndexFileNameFilter; + +namespace Lucene.Net.Store +{ + + /// A Directory is a flat list of files. Files may be written once, when they + /// are created. Once a file is created it may only be opened for read, or + /// deleted. Random access is permitted both when reading and writing. + /// + ///

Java's i/o APIs not used directly, but rather all i/o is + /// through this API. This permits things such as: + /// implementation of RAM-based indices; + /// implementation indices stored in a database, via JDBC; + /// implementation of an index as a single file; + /// + /// + /// Directory locking is implemented by an instance of + ///, and can be changed for each Directory + /// instance using . + /// + ///

+ [Serializable] + public abstract class Directory : System.IDisposable + { + protected internal volatile bool isOpen = true; + + /// Holds the LockFactory instance (implements locking for + /// this Directory instance). + /// + [NonSerialized] + protected internal LockFactory interalLockFactory; + + /// Returns an array of strings, one for each file in the directory. + /// + public abstract System.String[] ListAll(); + + /// Returns true iff a file with the given name exists. + public abstract bool FileExists(System.String name); + + /// Returns the time the named file was last modified. + public abstract long FileModified(System.String name); + + /// Set the modified time of an existing file to now. + public abstract void TouchFile(System.String name); + + /// Removes an existing file in the directory. + public abstract void DeleteFile(System.String name); + + /// Returns the length of a file in the directory. + public abstract long FileLength(System.String name); + + + /// Creates a new, empty file in the directory with the given name. + /// Returns a stream writing this file. + /// + public abstract IndexOutput CreateOutput(System.String name); + + /// Ensure that any writes to this file are moved to + /// stable storage. Lucene uses this to properly commit + /// changes to the index, to prevent a machine/OS crash + /// from corrupting the index. + /// + public virtual void Sync(System.String name) + { + } + + /// Returns a stream reading an existing file. + public abstract IndexInput OpenInput(System.String name); + + /// Returns a stream reading an existing file, with the + /// specified read buffer size. The particular Directory + /// implementation may ignore the buffer size. Currently + /// the only Directory implementations that respect this + /// parameter are and + ///. + /// + public virtual IndexInput OpenInput(System.String name, int bufferSize) + { + return OpenInput(name); + } + + /// Construct a . + /// the name of the lock file + /// + public virtual Lock MakeLock(System.String name) + { + return interalLockFactory.MakeLock(name); + } + /// Attempt to clear (forcefully unlock and remove) the + /// specified lock. Only call this at a time when you are + /// certain this lock is no longer in use. + /// + /// name of the lock to be cleared. + /// + public virtual void ClearLock(System.String name) + { + if (interalLockFactory != null) + { + interalLockFactory.ClearLock(name); + } + } + + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + /// Closes the store. + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + + /// Set the LockFactory that this Directory instance should + /// use for its locking implementation. Each * instance of + /// LockFactory should only be used for one directory (ie, + /// do not share a single instance across multiple + /// Directories). + /// + /// + /// instance of . + /// + public virtual void SetLockFactory(LockFactory lockFactory) + { + System.Diagnostics.Debug.Assert(lockFactory != null); + this.interalLockFactory = lockFactory; + lockFactory.LockPrefix = this.GetLockId(); + } + + /// Get the LockFactory that this Directory instance is + /// using for its locking implementation. Note that this + /// may be null for Directory implementations that provide + /// their own locking implementation. + /// + public virtual LockFactory LockFactory + { + get { return this.interalLockFactory; } + } + + /// Return a string identifier that uniquely differentiates + /// this Directory instance from other Directory instances. + /// This ID should be the same if two Directory instances + /// (even in different JVMs and/or on different machines) + /// are considered "the same index". This is how locking + /// "scopes" to the right index. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual string GetLockId() + { + return ToString(); + } + + public override string ToString() + { + return base.ToString() + " lockFactory=" + LockFactory; + } + + /// Copy contents of a directory src to a directory dest. + /// If a file in src already exists in dest then the + /// one in dest will be blindly overwritten. + /// + ///

NOTE: the source directory cannot change + /// while this method is running. Otherwise the results + /// are undefined and you could easily hit a + /// FileNotFoundException. + /// + ///

NOTE: this method only copies files that look + /// like index files (ie, have extensions matching the + /// known extensions of index files). + /// + ///

+ /// source directory + /// + /// destination directory + /// + /// if true, call method on source directory + /// + /// IOException + public static void Copy(Directory src, Directory dest, bool closeDirSrc) + { + System.String[] files = src.ListAll(); + + IndexFileNameFilter filter = IndexFileNameFilter.Filter; + + byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE]; + for (int i = 0; i < files.Length; i++) + { + + if (!filter.Accept(null, files[i])) + continue; + + IndexOutput os = null; + IndexInput is_Renamed = null; + try + { + // create file in dest directory + os = dest.CreateOutput(files[i]); + // read current file + is_Renamed = src.OpenInput(files[i]); + // and copy to dest directory + long len = is_Renamed.Length(); + long readCount = 0; + while (readCount < len) + { + int toRead = readCount + BufferedIndexOutput.BUFFER_SIZE > len?(int) (len - readCount):BufferedIndexOutput.BUFFER_SIZE; + is_Renamed.ReadBytes(buf, 0, toRead); + os.WriteBytes(buf, toRead); + readCount += toRead; + } + } + finally + { + // graceful cleanup + try + { + if (os != null) + os.Close(); + } + finally + { + if (is_Renamed != null) + is_Renamed.Close(); + } + } + } + if (closeDirSrc) + src.Close(); + } + + /// AlreadyClosedException if this Directory is closed + public /*protected internal*/ void EnsureOpen() + { + if (!isOpen) + throw new AlreadyClosedException("this Directory is closed"); + } + + public bool isOpen_ForNUnit + { + get { return isOpen; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/FSDirectory.cs b/external/Lucene.Net.Light/src/core/Store/FSDirectory.cs new file mode 100644 index 0000000000..04c8c0adfb --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/FSDirectory.cs @@ -0,0 +1,533 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +// Used only for WRITE_LOCK_NAME in deprecated create=true case: +using System.IO; +using Lucene.Net.Support; +using IndexFileNameFilter = Lucene.Net.Index.IndexFileNameFilter; +using IndexWriter = Lucene.Net.Index.IndexWriter; +using Constants = Lucene.Net.Util.Constants; + +namespace Lucene.Net.Store +{ + + /// + /// Base class for Directory implementations that store index + /// files in the file system. There are currently three core + /// subclasses: + /// + /// + /// + /// is a straightforward + /// implementation using java.io.RandomAccessFile. + /// However, it has poor concurrent performance + /// (multiple threads will bottleneck) as it + /// synchronizes when multiple threads read from the + /// same file. + /// + /// uses java.nio's + /// FileChannel's positional io when reading to avoid + /// synchronization when reading from the same file. + /// Unfortunately, due to a Windows-only Sun + /// JRE bug this is a poor choice for Windows, but + /// on all other platforms this is the preferred + /// choice. Applications using or + /// Future#cancel(boolean) (on Java 1.5) should use + /// instead. See java doc + /// for details. + /// + /// + /// + /// uses memory-mapped IO when + /// reading. This is a good choice if you have plenty + /// of virtual memory relative to your index size, eg + /// if you are running on a 64 bit JRE, or you are + /// running on a 32 bit JRE but your index sizes are + /// small enough to fit into the virtual memory space. + /// Java has currently the limitation of not being able to + /// unmap files from user code. The files are unmapped, when GC + /// releases the byte buffers. Due to + /// + /// this bug in Sun's JRE, MMapDirectory's + /// is unable to close the underlying OS file handle. Only when + /// GC finally collects the underlying objects, which could be + /// quite some time later, will the file handle be closed. + /// This will consume additional transient disk usage: on Windows, + /// attempts to delete or overwrite the files will result in an + /// exception; on other platforms, which typically have a "delete on + /// last close" semantics, while such operations will succeed, the bytes + /// are still consuming space on disk. For many applications this + /// limitation is not a problem (e.g. if you have plenty of disk space, + /// and you don't rely on overwriting files on Windows) but it's still + /// an important limitation to be aware of. This class supplies a + /// (possibly dangerous) workaround mentioned in the bug report, + /// which may fail on non-Sun JVMs. + /// + /// Applications using or + /// Future#cancel(boolean) (on Java 1.5) should use + /// instead. See + /// java doc for details. + /// + /// + /// Unfortunately, because of system peculiarities, there is + /// no single overall best implementation. Therefore, we've + /// added the method, to allow Lucene to choose + /// the best FSDirectory implementation given your + /// environment, and the known limitations of each + /// implementation. For users who have no reason to prefer a + /// specific implementation, it's best to simply use + ///. For all others, you should instantiate the + /// desired implementation directly. + /// + ///

The locking implementation is by default + ///, but can be changed by + /// passing in a custom instance. + ///

+ public abstract class FSDirectory : Directory + { + private static System.Security.Cryptography.HashAlgorithm DIGESTER; + + static FSDirectory() + { + try + { + DIGESTER = Cryptography.HashAlgorithm; + } + catch (System.Exception e) + { + throw new System.SystemException(e.ToString(), e); + } + } + + private bool checked_Renamed; + + internal void CreateDir() + { + if (!checked_Renamed) + { + if (!this.internalDirectory.Exists) + { + try + { + this.internalDirectory.Create(); + } + catch (Exception) + { + throw new System.IO.IOException("Cannot create directory: " + internalDirectory); + } + this.internalDirectory.Refresh(); // need to see the creation + } + + checked_Renamed = true; + } + } + + /// Initializes the directory to create a new file with the given name. + /// This method should be used in . + /// + protected internal void InitOutput(System.String name) + { + EnsureOpen(); + CreateDir(); + System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)); + if (file.Exists) // delete existing, if any + { + try + { + file.Delete(); + } + catch (Exception) + { + throw new System.IO.IOException("Cannot overwrite: " + file); + } + } + } + + /// The underlying filesystem directory + protected internal System.IO.DirectoryInfo internalDirectory = null; + + /// Create a new FSDirectory for the named location (ctor for subclasses). + /// the path of the directory + /// + /// the lock factory to use, or null for the default + /// (); + /// + /// IOException + protected internal FSDirectory(System.IO.DirectoryInfo path, LockFactory lockFactory) + { + // new ctors use always NativeFSLockFactory as default: + if (lockFactory == null) + { + lockFactory = new NativeFSLockFactory(); + } + // Set up lockFactory with cascaded defaults: if an instance was passed in, + // use that; else if locks are disabled, use NoLockFactory; else if the + // system property Lucene.Net.Store.FSDirectoryLockFactoryClass is set, + // instantiate that; else, use SimpleFSLockFactory: + + internalDirectory = path; + + // due to differences in how Java & .NET refer to files, the checks are a bit different + if (!internalDirectory.Exists && System.IO.File.Exists(internalDirectory.FullName)) + { + throw new NoSuchDirectoryException("file '" + internalDirectory.FullName + "' exists but is not a directory"); + } + SetLockFactory(lockFactory); + + // for filesystem based LockFactory, delete the lockPrefix, if the locks are placed + // in index dir. If no index dir is given, set ourselves + if (lockFactory is FSLockFactory) + { + FSLockFactory lf = (FSLockFactory)lockFactory; + System.IO.DirectoryInfo dir = lf.LockDir; + // if the lock factory has no lockDir set, use the this directory as lockDir + if (dir == null) + { + lf.LockDir = this.internalDirectory; + lf.LockPrefix = null; + } + else if (dir.FullName.Equals(this.internalDirectory.FullName)) + { + lf.LockPrefix = null; + } + } + } + + /// Creates an FSDirectory instance, trying to pick the + /// best implementation given the current environment. + /// The directory returned uses the . + /// + ///

Currently this returns as + /// NIOFSDirectory is currently not supported. + /// + ///

NOTE: this method may suddenly change which + /// implementation is returned from release to release, in + /// the event that higher performance defaults become + /// possible; if the precise implementation is important to + /// your application, please instantiate it directly, + /// instead. On 64 bit systems, it may also good to + /// return , but this is disabled + /// because of officially missing unmap support in Java. + /// For optimal performance you should consider using + /// this implementation on 64 bit JVMs. + /// + ///

See above + ///

+ public static FSDirectory Open(string path) + { + return Open(new DirectoryInfo(path), null); + } + + /// Creates an FSDirectory instance, trying to pick the + /// best implementation given the current environment. + /// The directory returned uses the . + /// + ///

Currently this returns as + /// NIOFSDirectory is currently not supported. + /// + ///

NOTE: this method may suddenly change which + /// implementation is returned from release to release, in + /// the event that higher performance defaults become + /// possible; if the precise implementation is important to + /// your application, please instantiate it directly, + /// instead. On 64 bit systems, it may also good to + /// return , but this is disabled + /// because of officially missing unmap support in Java. + /// For optimal performance you should consider using + /// this implementation on 64 bit JVMs. + /// + ///

See above + ///

+ public static FSDirectory Open(System.IO.DirectoryInfo path) + { + return Open(path, null); + } + + /// Just like , but allows you to + /// also specify a custom . + /// + public static FSDirectory Open(System.IO.DirectoryInfo path, LockFactory lockFactory) + { + /* For testing: + MMapDirectory dir=new MMapDirectory(path, lockFactory); + dir.setUseUnmap(true); + return dir; + */ + + if (Constants.WINDOWS) + { + return new SimpleFSDirectory(path, lockFactory); + } + else + { + //NIOFSDirectory is not implemented in Lucene.Net + //return new NIOFSDirectory(path, lockFactory); + return new SimpleFSDirectory(path, lockFactory); + } + } + + /// Lists all files (not subdirectories) in the + /// directory. This method never returns null (throws + /// instead). + /// + /// + /// NoSuchDirectoryException if the directory + /// does not exist, or does exist but is not a + /// directory. + /// + /// IOException if list() returns null + public static System.String[] ListAll(System.IO.DirectoryInfo dir) + { + if (!dir.Exists) + { + throw new NoSuchDirectoryException("directory '" + dir.FullName + "' does not exist"); + } + else if (System.IO.File.Exists(dir.FullName)) + { + throw new NoSuchDirectoryException("File '" + dir.FullName + "' does not exist"); + } + + // Exclude subdirs, only the file names, not the paths + System.IO.FileInfo[] files = dir.GetFiles(); + System.String[] result = new System.String[files.Length]; + for (int i = 0; i < files.Length; i++) + { + result[i] = files[i].Name; + } + + // no reason to return null, if the directory cannot be listed, an exception + // will be thrown on the above call to dir.GetFiles() + // use of LINQ to create the return value array may be a bit more efficient + + return result; + } + + /// Lists all files (not subdirectories) in the + /// directory. + /// + /// + /// + public override System.String[] ListAll() + { + EnsureOpen(); + return ListAll(internalDirectory); + } + + /// Returns true iff a file with the given name exists. + public override bool FileExists(System.String name) + { + EnsureOpen(); + System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)); + return file.Exists; + } + + /// Returns the time the named file was last modified. + public override long FileModified(System.String name) + { + EnsureOpen(); + System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)); + return (long)file.LastWriteTime.ToUniversalTime().Subtract(new DateTime(1970, 1, 1, 0, 0, 0)).TotalMilliseconds; //{{LUCENENET-353}} + } + + /// Returns the time the named file was last modified. + public static long FileModified(System.IO.FileInfo directory, System.String name) + { + System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, name)); + return (long)file.LastWriteTime.ToUniversalTime().Subtract(new DateTime(1970, 1, 1, 0, 0, 0)).TotalMilliseconds; //{{LUCENENET-353}} + } + + /// Set the modified time of an existing file to now. + public override void TouchFile(System.String name) + { + EnsureOpen(); + System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)); + file.LastWriteTime = System.DateTime.Now; + } + + /// Returns the length in bytes of a file in the directory. + public override long FileLength(System.String name) + { + EnsureOpen(); + System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)); + return file.Exists ? file.Length : 0; + } + + /// Removes an existing file in the directory. + public override void DeleteFile(System.String name) + { + EnsureOpen(); + System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)); + try + { + file.Delete(); + } + catch (Exception) + { + throw new System.IO.IOException("Cannot delete " + file); + } + } + + public override void Sync(System.String name) + { + EnsureOpen(); + System.IO.FileInfo fullFile = new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name)); + bool success = false; + int retryCount = 0; + System.IO.IOException exc = null; + while (!success && retryCount < 5) + { + retryCount++; + System.IO.FileStream file = null; + try + { + try + { + file = new System.IO.FileStream(fullFile.FullName, System.IO.FileMode.OpenOrCreate, System.IO.FileAccess.Write, System.IO.FileShare.ReadWrite); + FileSupport.Sync(file); + success = true; + } + finally + { + if (file != null) + file.Close(); + } + } + catch (System.IO.IOException ioe) + { + if (exc == null) + exc = ioe; + + // Pause 5 msec + System.Threading.Thread.Sleep(5); + + } + } + + if (!success && exc != null) + // Throw original exception + throw exc; + } + + // Inherit javadoc + public override IndexInput OpenInput(System.String name) + { + EnsureOpen(); + return OpenInput(name, BufferedIndexInput.BUFFER_SIZE); + } + + /// So we can do some byte-to-hexchar conversion below + private static readonly char[] HEX_DIGITS = new char[]{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + + + public override string GetLockId() + { + EnsureOpen(); + System.String dirName; // name to be hashed + try + { + dirName = internalDirectory.FullName; + } + catch (System.IO.IOException e) + { + throw new System.SystemException(e.ToString(), e); + } + + byte[] digest; + lock (DIGESTER) + { + digest = DIGESTER.ComputeHash(System.Text.Encoding.UTF8.GetBytes(dirName)); + } + System.Text.StringBuilder buf = new System.Text.StringBuilder(); + buf.Append("lucene-"); + for (int i = 0; i < digest.Length; i++) + { + int b = digest[i]; + buf.Append(HEX_DIGITS[(b >> 4) & 0xf]); + buf.Append(HEX_DIGITS[b & 0xf]); + } + + return buf.ToString(); + } + + protected override void Dispose(bool disposing) + { + lock (this) + { + isOpen = false; + } + } + + // Java Lucene implements GetFile() which returns a FileInfo. + // For Lucene.Net, GetDirectory() is more appropriate + + public virtual DirectoryInfo Directory + { + get + { + EnsureOpen(); + return internalDirectory; + } + } + + /// For debug output. + public override System.String ToString() + { + return this.GetType().FullName + "@" + internalDirectory + " lockFactory=" + LockFactory; + } + + /// Default read chunk size. This is a conditional + /// default: on 32bit JVMs, it defaults to 100 MB. On + /// 64bit JVMs, it's Integer.MAX_VALUE. + /// + /// + /// + public static readonly int DEFAULT_READ_CHUNK_SIZE = Constants.JRE_IS_64BIT ? int.MaxValue: 100 * 1024 * 1024; + + // LUCENE-1566 + private int chunkSize = DEFAULT_READ_CHUNK_SIZE; + + /// The maximum number of bytes to read at once from the + /// underlying file during . + /// + /// + /// + public int ReadChunkSize + { + get + { + // LUCENE-1566 + return chunkSize; + } + set + { + // LUCENE-1566 + if (value <= 0) + { + throw new System.ArgumentException("chunkSize must be positive"); + } + if (!Constants.JRE_IS_64BIT) + { + this.chunkSize = value; + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/FSLockFactory.cs b/external/Lucene.Net.Light/src/core/Store/FSLockFactory.cs new file mode 100644 index 0000000000..47e8acfb9c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/FSLockFactory.cs @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.IO; + +namespace Lucene.Net.Store +{ + + /// Base class for file system based locking implementation. + + public abstract class FSLockFactory:LockFactory + { + + /// Directory for the lock files. + protected internal System.IO.DirectoryInfo internalLockDir = null; + + /// Gets the lock directory. + /// Subclasses can use this to set the lock directory. + /// This method can be only called + /// once to initialize the lock directory. It is used by + /// to set the lock directory to itsself. + /// Subclasses can also use this method to set the directory + /// in the constructor. + /// + /// + public virtual DirectoryInfo LockDir + { + get { return internalLockDir; } + protected internal set + { + if (this.internalLockDir != null) + throw new System.SystemException("You can set the lock directory for this factory only once."); + this.internalLockDir = value; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/FileSwitchDirectory.cs b/external/Lucene.Net.Light/src/core/Store/FileSwitchDirectory.cs new file mode 100644 index 0000000000..e5d1f40011 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/FileSwitchDirectory.cs @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// Expert: A Directory instance that switches files between + /// two other Directory instances. + ///

Files with the specified extensions are placed in the + /// primary directory; others are placed in the secondary + /// directory. The provided Set must not change once passed + /// to this class, and must allow multiple threads to call + /// contains at once.

+ /// + ///

NOTE: this API is new and experimental and is + /// subject to suddenly change in the next release. + ///

+ + public class FileSwitchDirectory:Directory + { + private Directory secondaryDir; + private Directory primaryDir; + private System.Collections.Generic.HashSet primaryExtensions; + private bool doClose; + private bool isDisposed; + + public FileSwitchDirectory(System.Collections.Generic.HashSet primaryExtensions, + Directory primaryDir, + Directory secondaryDir, + bool doClose) + { + this.primaryExtensions = primaryExtensions; + this.primaryDir = primaryDir; + this.secondaryDir = secondaryDir; + this.doClose = doClose; + this.interalLockFactory = primaryDir.LockFactory; + } + + /// Return the primary directory + public virtual Directory PrimaryDir + { + get { return primaryDir; } + } + + /// Return the secondary directory + public virtual Directory SecondaryDir + { + get { return secondaryDir; } + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (doClose) + { + try + { + if (secondaryDir != null) + { + secondaryDir.Close(); + } + } + finally + { + if (primaryDir != null) + { + primaryDir.Close(); + } + } + doClose = false; + } + + secondaryDir = null; + primaryDir = null; + isDisposed = true; + } + + public override System.String[] ListAll() + { + var files = new System.Collections.Generic.List(); + files.AddRange(primaryDir.ListAll()); + files.AddRange(secondaryDir.ListAll()); + return files.ToArray(); + } + + /// Utility method to return a file's extension. + public static System.String GetExtension(System.String name) + { + int i = name.LastIndexOf('.'); + if (i == - 1) + { + return ""; + } + return name.Substring(i + 1, (name.Length) - (i + 1)); + } + + private Directory GetDirectory(System.String name) + { + System.String ext = GetExtension(name); + if (primaryExtensions.Contains(ext)) + { + return primaryDir; + } + else + { + return secondaryDir; + } + } + + public override bool FileExists(System.String name) + { + return GetDirectory(name).FileExists(name); + } + + public override long FileModified(System.String name) + { + return GetDirectory(name).FileModified(name); + } + + public override void TouchFile(System.String name) + { + GetDirectory(name).TouchFile(name); + } + + public override void DeleteFile(System.String name) + { + GetDirectory(name).DeleteFile(name); + } + + public override long FileLength(System.String name) + { + return GetDirectory(name).FileLength(name); + } + + public override IndexOutput CreateOutput(System.String name) + { + return GetDirectory(name).CreateOutput(name); + } + + public override void Sync(System.String name) + { + GetDirectory(name).Sync(name); + } + + public override IndexInput OpenInput(System.String name) + { + return GetDirectory(name).OpenInput(name); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/IndexInput.cs b/external/Lucene.Net.Light/src/core/Store/IndexInput.cs new file mode 100644 index 0000000000..b7012ce610 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/IndexInput.cs @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Store +{ + + /// Abstract base class for input from a file in a . A + /// random-access input stream. Used for all Lucene index input operations. + /// + /// + /// + public abstract class IndexInput : System.ICloneable, IDisposable + { + private bool preUTF8Strings; // true if we are reading old (modified UTF8) string format + + /// Reads and returns a single byte. + /// + /// + public abstract byte ReadByte(); + + /// Reads a specified number of bytes into an array at the specified offset. + /// the array to read bytes into + /// + /// the offset in the array to start storing bytes + /// + /// the number of bytes to read + /// + /// + /// + public abstract void ReadBytes(byte[] b, int offset, int len); + + /// Reads a specified number of bytes into an array at the + /// specified offset with control over whether the read + /// should be buffered (callers who have their own buffer + /// should pass in "false" for useBuffer). Currently only + /// respects this parameter. + /// + /// the array to read bytes into + /// + /// the offset in the array to start storing bytes + /// + /// the number of bytes to read + /// + /// set to false if the caller will handle + /// buffering. + /// + /// + /// + public virtual void ReadBytes(byte[] b, int offset, int len, bool useBuffer) + { + // Default to ignoring useBuffer entirely + ReadBytes(b, offset, len); + } + + /// Reads four bytes and returns an int. + /// + /// + public virtual int ReadInt() + { + return ((ReadByte() & 0xFF) << 24) | ((ReadByte() & 0xFF) << 16) | ((ReadByte() & 0xFF) << 8) | (ReadByte() & 0xFF); + } + + /// Reads an int stored in variable-length format. Reads between one and + /// five bytes. Smaller values take fewer bytes. Negative numbers are not + /// supported. + /// + /// + /// + public virtual int ReadVInt() + { + byte b = ReadByte(); + int i = b & 0x7F; + for (int shift = 7; (b & 0x80) != 0; shift += 7) + { + b = ReadByte(); + i |= (b & 0x7F) << shift; + } + return i; + } + + /// Reads eight bytes and returns a long. + /// + /// + public virtual long ReadLong() + { + return (((long) ReadInt()) << 32) | (ReadInt() & 0xFFFFFFFFL); + } + + /// Reads a long stored in variable-length format. Reads between one and + /// nine bytes. Smaller values take fewer bytes. Negative numbers are not + /// supported. + /// + public virtual long ReadVLong() + { + byte b = ReadByte(); + long i = b & 0x7F; + for (int shift = 7; (b & 0x80) != 0; shift += 7) + { + b = ReadByte(); + i |= (b & 0x7FL) << shift; + } + return i; + } + + /// Call this if readString should read characters stored + /// in the old modified UTF8 format (length in java chars + /// and java's modified UTF8 encoding). This is used for + /// indices written pre-2.4 See LUCENE-510 for details. + /// + public virtual void SetModifiedUTF8StringsMode() + { + preUTF8Strings = true; + } + + /// Reads a string. + /// + /// + public virtual System.String ReadString() + { + if (preUTF8Strings) + return ReadModifiedUTF8String(); + int length = ReadVInt(); + byte[] bytes = new byte[length]; + ReadBytes(bytes, 0, length); + return System.Text.Encoding.UTF8.GetString(bytes, 0, length); + } + + private System.String ReadModifiedUTF8String() + { + int length = ReadVInt(); + char[] chars = new char[length]; + ReadChars(chars, 0, length); + return new System.String(chars, 0, length); + } + + /// Reads Lucene's old "modified UTF-8" encoded + /// characters into an array. + /// + /// the array to read characters into + /// + /// the offset in the array to start storing characters + /// + /// the number of characters to read + /// + /// + /// + /// -- please use readString or readBytes + /// instead, and construct the string + /// from those utf8 bytes + /// + [Obsolete("-- please use ReadString or ReadBytes instead, and construct the string from those utf8 bytes")] + public virtual void ReadChars(char[] buffer, int start, int length) + { + int end = start + length; + for (int i = start; i < end; i++) + { + byte b = ReadByte(); + if ((b & 0x80) == 0) + buffer[i] = (char) (b & 0x7F); + else if ((b & 0xE0) != 0xE0) + { + buffer[i] = (char) (((b & 0x1F) << 6) | (ReadByte() & 0x3F)); + } + else + buffer[i] = (char) (((b & 0x0F) << 12) | ((ReadByte() & 0x3F) << 6) | (ReadByte() & 0x3F)); + } + } + + /// Expert + /// + /// Similar to but does not do any conversion operations on the bytes it is reading in. It still + /// has to invoke just as does, but it does not need a buffer to store anything + /// and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine + /// how many more bytes to read + /// + /// The number of chars to read + /// + /// this method operates on old "modified utf8" encoded + /// strings + /// + [Obsolete("this method operates on old \"modified utf8\" encoded strings")] + public virtual void SkipChars(int length) + { + for (int i = 0; i < length; i++) + { + byte b = ReadByte(); + if ((b & 0x80) == 0) + { + //do nothing, we only need one byte + } + else if ((b & 0xE0) != 0xE0) + { + ReadByte(); //read an additional byte + } + else + { + //read two additional bytes. + ReadByte(); + ReadByte(); + } + } + } + + [Obsolete("Use Dispose() instead.")] + public void Close() + { + Dispose(); + } + + /// Closes the stream to futher operations. + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + + /// Returns the current position in this file, where the next read will + /// occur. + /// + /// + /// + public abstract long FilePointer { get; } + + /// Sets current position in this file, where the next read will occur. + /// + /// + public abstract void Seek(long pos); + + /// The number of bytes in the file. + public abstract long Length(); + + /// Returns a clone of this stream. + /// + ///

Clones of a stream access the same data, and are positioned at the same + /// point as the stream they were cloned from. + /// + ///

Expert: Subclasses must ensure that clones may be positioned at + /// different points in the input from each other and from the stream they + /// were cloned from. + ///

+ public virtual System.Object Clone() + { + IndexInput clone = null; + try + { + clone = (IndexInput) base.MemberwiseClone(); + } + catch (System.Exception) + { + } + + return clone; + } + + // returns Map + public virtual System.Collections.Generic.IDictionary ReadStringStringMap() + { + var map = new HashMap(); + int count = ReadInt(); + for (int i = 0; i < count; i++) + { + System.String key = ReadString(); + System.String val = ReadString(); + map[key] = val; + } + + return map; + } + + /*public abstract void Dispose();*/ + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/IndexOutput.cs b/external/Lucene.Net.Light/src/core/Store/IndexOutput.cs new file mode 100644 index 0000000000..687c99c207 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/IndexOutput.cs @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using UnicodeUtil = Lucene.Net.Util.UnicodeUtil; + +namespace Lucene.Net.Store +{ + + /// Abstract base class for output to a file in a Directory. A random-access + /// output stream. Used for all Lucene index output operations. + /// + /// + /// + /// + /// + public abstract class IndexOutput : IDisposable + { + /// Writes a single byte. + /// + /// + public abstract void WriteByte(byte b); + + /// Writes an array of bytes. + /// the bytes to write + /// + /// the number of bytes to write + /// + /// + /// + public virtual void WriteBytes(byte[] b, int length) + { + WriteBytes(b, 0, length); + } + + /// Writes an array of bytes. + /// the bytes to write + /// + /// the offset in the byte array + /// + /// the number of bytes to write + /// + /// + /// + public abstract void WriteBytes(byte[] b, int offset, int length); + + /// Writes an int as four bytes. + /// + /// + public virtual void WriteInt(int i) + { + WriteByte((byte) (i >> 24)); + WriteByte((byte) (i >> 16)); + WriteByte((byte) (i >> 8)); + WriteByte((byte) i); + } + + /// Writes an int in a variable-length format. Writes between one and + /// five bytes. Smaller values take fewer bytes. Negative numbers are not + /// supported. + /// + /// + /// + public virtual void WriteVInt(int i) + { + while ((i & ~ 0x7F) != 0) + { + WriteByte((byte) ((i & 0x7f) | 0x80)); + i = Number.URShift(i, 7); + } + WriteByte((byte) i); + } + + /// Writes a long as eight bytes. + /// + /// + public virtual void WriteLong(long i) + { + WriteInt((int) (i >> 32)); + WriteInt((int) i); + } + + /// Writes an long in a variable-length format. Writes between one and five + /// bytes. Smaller values take fewer bytes. Negative numbers are not + /// supported. + /// + /// + /// + public virtual void WriteVLong(long i) + { + while ((i & ~ 0x7F) != 0) + { + WriteByte((byte) ((i & 0x7f) | 0x80)); + i = Number.URShift(i, 7); + } + WriteByte((byte) i); + } + + /// Writes a string. + /// + /// + public virtual void WriteString(System.String s) + { + UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result(); + UnicodeUtil.UTF16toUTF8(s, 0, s.Length, utf8Result); + WriteVInt(utf8Result.length); + WriteBytes(utf8Result.result, 0, utf8Result.length); + } + + /// Writes a sub sequence of characters from s as the old + /// format (modified UTF-8 encoded bytes). + /// + /// the source of the characters + /// + /// the first character in the sequence + /// + /// the number of characters in the sequence + /// + /// -- please pre-convert to utf8 bytes + /// instead or use + /// + [Obsolete("-- please pre-convert to utf8 bytes instead or use WriteString")] + public virtual void WriteChars(System.String s, int start, int length) + { + int end = start + length; + for (int i = start; i < end; i++) + { + int code = (int) s[i]; + if (code >= 0x01 && code <= 0x7F) + WriteByte((byte) code); + else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) + { + WriteByte((byte) (0xC0 | (code >> 6))); + WriteByte((byte) (0x80 | (code & 0x3F))); + } + else + { + WriteByte((byte) (0xE0 | (Number.URShift(code, 12)))); + WriteByte((byte) (0x80 | ((code >> 6) & 0x3F))); + WriteByte((byte) (0x80 | (code & 0x3F))); + } + } + } + + /// Writes a sub sequence of characters from char[] as + /// the old format (modified UTF-8 encoded bytes). + /// + /// the source of the characters + /// + /// the first character in the sequence + /// + /// the number of characters in the sequence + /// + /// -- please pre-convert to utf8 bytes instead or use + /// + [Obsolete("-- please pre-convert to utf8 bytes instead or use WriteString")] + public virtual void WriteChars(char[] s, int start, int length) + { + int end = start + length; + for (int i = start; i < end; i++) + { + int code = (int) s[i]; + if (code >= 0x01 && code <= 0x7F) + WriteByte((byte) code); + else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) + { + WriteByte((byte) (0xC0 | (code >> 6))); + WriteByte((byte) (0x80 | (code & 0x3F))); + } + else + { + WriteByte((byte) (0xE0 | (Number.URShift(code, 12)))); + WriteByte((byte) (0x80 | ((code >> 6) & 0x3F))); + WriteByte((byte) (0x80 | (code & 0x3F))); + } + } + } + + private static int COPY_BUFFER_SIZE = 16384; + private byte[] copyBuffer; + + /// Copy numBytes bytes from input to ourself. + public virtual void CopyBytes(IndexInput input, long numBytes) + { + System.Diagnostics.Debug.Assert(numBytes >= 0, "numBytes=" + numBytes); + long left = numBytes; + if (copyBuffer == null) + copyBuffer = new byte[COPY_BUFFER_SIZE]; + while (left > 0) + { + int toCopy; + if (left > COPY_BUFFER_SIZE) + toCopy = COPY_BUFFER_SIZE; + else + toCopy = (int) left; + input.ReadBytes(copyBuffer, 0, toCopy); + WriteBytes(copyBuffer, 0, toCopy); + left -= toCopy; + } + } + + /// Forces any buffered output to be written. + public abstract void Flush(); + + /// Closes this stream to further operations. + [Obsolete("Use Dispose() instead.")] + public void Close() + { + Dispose(); + } + + /// Closes this stream to further operations. + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + + /// Returns the current position in this file, where the next write will + /// occur. + /// + /// + /// + public abstract long FilePointer { get; } + + /// Sets current position in this file, where the next write will occur. + /// + /// + public abstract void Seek(long pos); + + /// The number of bytes in the file. + public abstract long Length { get; } + + /// Set the file length. By default, this method does + /// nothing (it's optional for a Directory to implement + /// it). But, certain Directory implementations (for + /// + /// can use this to inform the + /// underlying IO system to pre-allocate the file to the + /// specified size. If the length is longer than the + /// current file length, the bytes added to the file are + /// undefined. Otherwise the file is truncated. + /// + /// file length + /// + public virtual void SetLength(long length) + { + } + + + // map must be Map + public virtual void WriteStringStringMap(System.Collections.Generic.IDictionary map) + { + if (map == null) + { + WriteInt(0); + } + else + { + WriteInt(map.Count); + foreach (var entry in map) + { + WriteString(entry.Key); + WriteString(entry.Value); + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/Lock.cs b/external/Lucene.Net.Light/src/core/Store/Lock.cs new file mode 100644 index 0000000000..9c3001271b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/Lock.cs @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Store +{ + + /// An interprocess mutex lock. + ///

Typical use might look like: + /// new Lock.With(directory.makeLock("my.lock")) { + /// public Object doBody() { + /// ... code to execute while locked ... + /// } + /// }.run(); + /// + ///

+ /// + public abstract class Lock + { + + /// How long waits, in milliseconds, + /// in between attempts to acquire the lock. + /// + public static long LOCK_POLL_INTERVAL = 1000; + + /// Pass this value to to try + /// forever to obtain the lock. + /// + public const long LOCK_OBTAIN_WAIT_FOREVER = - 1; + + /// Attempts to obtain exclusive access and immediately return + /// upon success or failure. + /// + /// true iff exclusive access is obtained + /// + public abstract bool Obtain(); + + /// If a lock obtain called, this failureReason may be set + /// with the "root cause" Exception as to why the lock was + /// not obtained. + /// + protected internal System.Exception failureReason; + + /// Attempts to obtain an exclusive lock within amount of + /// time given. Polls once per + /// (currently 1000) milliseconds until lockWaitTimeout is + /// passed. + /// + /// length of time to wait in + /// milliseconds or + /// to retry forever + /// + /// true if lock was obtained + /// + /// LockObtainFailedException if lock wait times out + /// IllegalArgumentException if lockWaitTimeout is + /// out of bounds + /// + /// IOException if obtain() throws IOException + public virtual bool Obtain(long lockWaitTimeout) + { + failureReason = null; + bool locked = Obtain(); + if (lockWaitTimeout < 0 && lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER) + throw new System.ArgumentException("lockWaitTimeout should be LOCK_OBTAIN_WAIT_FOREVER or a non-negative number (got " + lockWaitTimeout + ")"); + + long maxSleepCount = lockWaitTimeout / LOCK_POLL_INTERVAL; + long sleepCount = 0; + while (!locked) + { + if (lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER && sleepCount++ >= maxSleepCount) + { + System.String reason = "Lock obtain timed out: " + this.ToString(); + if (failureReason != null) + { + reason += (": " + failureReason); + } + var e = failureReason != null + ? new LockObtainFailedException(reason, failureReason) + : new LockObtainFailedException(reason); + throw e; + } + try + { + System.Threading.Thread.Sleep(TimeSpan.FromMilliseconds(LOCK_POLL_INTERVAL)); + } + catch (System.Threading.ThreadInterruptedException) + { + throw; + } + locked = Obtain(); + } + return locked; + } + + /// Releases exclusive access. + public abstract void Release(); + + /// Returns true if the resource is currently locked. Note that one must + /// still call before using the resource. + /// + public abstract bool IsLocked(); + + + /// Utility class for executing code with exclusive access. + public abstract class With + { + private Lock lock_Renamed; + private long lockWaitTimeout; + + + /// Constructs an executor that will grab the named lock. + protected With(Lock lock_Renamed, long lockWaitTimeout) + { + this.lock_Renamed = lock_Renamed; + this.lockWaitTimeout = lockWaitTimeout; + } + + /// Code to execute with exclusive access. + protected internal abstract System.Object DoBody(); + + /// Calls while lock is obtained. Blocks if lock + /// cannot be obtained immediately. Retries to obtain lock once per second + /// until it is obtained, or until it has tried ten times. Lock is released when + /// exits. + /// + /// LockObtainFailedException if lock could not + /// be obtained + /// + /// IOException if throws IOException + public virtual System.Object run() + { + bool locked = false; + try + { + locked = lock_Renamed.Obtain(lockWaitTimeout); + return DoBody(); + } + finally + { + if (locked) + lock_Renamed.Release(); + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/LockFactory.cs b/external/Lucene.Net.Light/src/core/Store/LockFactory.cs new file mode 100644 index 0000000000..b3f34fc1ab --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/LockFactory.cs @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + ///

Base class for Locking implementation. uses + /// instances of this class to implement locking.

+ /// + ///

Note that there are some useful tools to verify that + /// your LockFactory is working correctly: + ///, , + ///.

+ /// + ///

+ /// + /// + /// + /// + /// + /// + + public abstract class LockFactory + { + protected internal System.String internalLockPrefix = null; + + /// Gets or sets the prefix in use for all locks created in this + /// LockFactory. This is normally called once, when a + /// Directory gets this LockFactory instance. However, you + /// can also call this (after this instance is assigned to + /// a Directory) to override the prefix in use. This + /// is helpful if you're running Lucene on machines that + /// have different mount points for the same shared + /// directory. + /// + public virtual string LockPrefix + { + get { return this.internalLockPrefix; } + set { this.internalLockPrefix = value; } + } + + /// Return a new Lock instance identified by lockName. + /// name of the lock to be created. + /// + public abstract Lock MakeLock(System.String lockName); + + /// Attempt to clear (forcefully unlock and remove) the + /// specified lock. Only call this at a time when you are + /// certain this lock is no longer in use. + /// + /// name of the lock to be cleared. + /// + abstract public void ClearLock(System.String lockName); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/LockObtainFailedException.cs b/external/Lucene.Net.Light/src/core/Store/LockObtainFailedException.cs new file mode 100644 index 0000000000..065b3624a6 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/LockObtainFailedException.cs @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// This exception is thrown when the write.lock + /// could not be acquired. This + /// happens when a writer tries to open an index + /// that another writer already has open. + /// + /// + /// + [Serializable] + public class LockObtainFailedException:System.IO.IOException + { + public LockObtainFailedException(System.String message):base(message) + { + } + + public LockObtainFailedException(System.String message, System.Exception ex) : base(message, ex) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/LockReleaseFailedException.cs b/external/Lucene.Net.Light/src/core/Store/LockReleaseFailedException.cs new file mode 100644 index 0000000000..121283e084 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/LockReleaseFailedException.cs @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// This exception is thrown when the write.lock + /// could not be released. + /// + /// + /// + [Serializable] + public class LockReleaseFailedException:System.IO.IOException + { + public LockReleaseFailedException(System.String message):base(message) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/LockStressTest.cs b/external/Lucene.Net.Light/src/core/Store/LockStressTest.cs new file mode 100644 index 0000000000..5ac81fef2e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/LockStressTest.cs @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// Simple standalone tool that forever acquires & releases a + /// lock using a specific LockFactory. Run without any args + /// to see usage. + /// + /// + /// + /// + /// + /// + + public class LockStressTest + { + + [STAThread] + public static void Main(System.String[] args) + { + + if (args.Length != 6) + { + System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Store.LockStressTest myID verifierHostOrIP verifierPort lockFactoryClassName lockDirName sleepTime\n" + "\n" + " myID = int from 0 .. 255 (should be unique for test process)\n" + " verifierHostOrIP = host name or IP address where LockVerifyServer is running\n" + " verifierPort = port that LockVerifyServer is listening on\n" + " lockFactoryClassName = primary LockFactory class that we will use\n" + " lockDirName = path to the lock directory (only set for Simple/NativeFSLockFactory\n" + " sleepTimeMS = milliseconds to pause betweeen each lock obtain/release\n" + "\n" + "You should run multiple instances of this process, each with its own\n" + "unique ID, and each pointing to the same lock directory, to verify\n" + "that locking is working correctly.\n" + "\n" + "Make sure you are first running LockVerifyServer.\n" + "\n"); + System.Environment.Exit(1); + } + + int myID = System.Int32.Parse(args[0]); + + if (myID < 0 || myID > 255) + { + System.Console.Out.WriteLine("myID must be a unique int 0..255"); + System.Environment.Exit(1); + } + + System.String verifierHost = args[1]; + int verifierPort = System.Int32.Parse(args[2]); + System.String lockFactoryClassName = args[3]; + System.String lockDirName = args[4]; + int sleepTimeMS = System.Int32.Parse(args[5]); + + System.Type c; + try + { + c = System.Type.GetType(lockFactoryClassName); + } + catch (System.Exception) + { + throw new System.IO.IOException("unable to find LockClass " + lockFactoryClassName); + } + + LockFactory lockFactory; + try + { + lockFactory = (LockFactory) System.Activator.CreateInstance(c); + } + catch (System.UnauthorizedAccessException) + { + throw new System.IO.IOException("IllegalAccessException when instantiating LockClass " + lockFactoryClassName); + } + catch (System.InvalidCastException) + { + throw new System.IO.IOException("unable to cast LockClass " + lockFactoryClassName + " instance to a LockFactory"); + } + catch (System.Exception) + { + throw new System.IO.IOException("InstantiationException when instantiating LockClass " + lockFactoryClassName); + } + + System.IO.DirectoryInfo lockDir = new System.IO.DirectoryInfo(lockDirName); + + if (lockFactory is NativeFSLockFactory) + { + ((NativeFSLockFactory) lockFactory).LockDir = lockDir; + } + else if (lockFactory is SimpleFSLockFactory) + { + ((SimpleFSLockFactory) lockFactory).LockDir = lockDir; + } + + lockFactory.LockPrefix = "test"; + + LockFactory verifyLF = new VerifyingLockFactory((sbyte) myID, lockFactory, verifierHost, verifierPort); + + Lock l = verifyLF.MakeLock("test.lock"); + + while (true) + { + + bool obtained = false; + + try + { + obtained = l.Obtain(10); + } + catch (LockObtainFailedException) + { + System.Console.Out.Write("x"); + } + + if (obtained) + { + System.Console.Out.Write("l"); + l.Release(); + } + System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * sleepTimeMS)); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/LockVerifyServer.cs b/external/Lucene.Net.Light/src/core/Store/LockVerifyServer.cs new file mode 100644 index 0000000000..bf0a3c511c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/LockVerifyServer.cs @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// Simple standalone server that must be running when you + /// use . This server simply + /// verifies at most one process holds the lock at a time. + /// Run without any args to see usage. + /// + /// + /// + /// + /// + /// + + public class LockVerifyServer + { + + private static System.String GetTime(long startTime) + { + return "[" + (((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - startTime) / 1000) + "s] "; + } + + [STAThread] + public static void Main(System.String[] args) + { + + if (args.Length != 1) + { + System.Console.Out.WriteLine("\nUsage: java Lucene.Net.Store.LockVerifyServer port\n"); + System.Environment.Exit(1); + } + + int port = System.Int32.Parse(args[0]); + + System.Net.Sockets.TcpListener temp_tcpListener; + temp_tcpListener = new System.Net.Sockets.TcpListener(System.Net.Dns.GetHostEntry(System.Net.Dns.GetHostName()).AddressList[0], port); + temp_tcpListener.Server.SetSocketOption(System.Net.Sockets.SocketOptionLevel.Socket, System.Net.Sockets.SocketOptionName.ReuseAddress, 1); + temp_tcpListener.Start(); + System.Net.Sockets.TcpListener s = temp_tcpListener; + System.Console.Out.WriteLine("\nReady on port " + port + "..."); + + int lockedID = 0; + long startTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); + + while (true) + { + System.Net.Sockets.TcpClient cs = s.AcceptTcpClient(); + System.IO.Stream out_Renamed = cs.GetStream(); + System.IO.Stream in_Renamed = cs.GetStream(); + + int id = in_Renamed.ReadByte(); + int command = in_Renamed.ReadByte(); + + bool err = false; + + if (command == 1) + { + // Locked + if (lockedID != 0) + { + err = true; + System.Console.Out.WriteLine(GetTime(startTime) + " ERROR: id " + id + " got lock, but " + lockedID + " already holds the lock"); + } + lockedID = id; + } + else if (command == 0) + { + if (lockedID != id) + { + err = true; + System.Console.Out.WriteLine(GetTime(startTime) + " ERROR: id " + id + " released the lock, but " + lockedID + " is the one holding the lock"); + } + lockedID = 0; + } + else + throw new System.SystemException("unrecognized command " + command); + + System.Console.Out.Write("."); + + if (err) + out_Renamed.WriteByte((System.Byte) 1); + else + out_Renamed.WriteByte((System.Byte) 0); + + out_Renamed.Close(); + in_Renamed.Close(); + cs.Close(); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/MMapDirectory.cs b/external/Lucene.Net.Light/src/core/Store/MMapDirectory.cs new file mode 100644 index 0000000000..65e68d57ec --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/MMapDirectory.cs @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using Constants = Lucene.Net.Util.Constants; + +namespace Lucene.Net.Store +{ + + /// File-based implementation that uses + /// mmap for reading, and + /// for writing. + /// + ///

NOTE: memory mapping uses up a portion of the + /// virtual memory address space in your process equal to the + /// size of the file being mapped. Before using this class, + /// be sure your have plenty of virtual address space, e.g. by + /// using a 64 bit JRE, or a 32 bit JRE with indexes that are + /// guaranteed to fit within the address space. + /// On 32 bit platforms also consult + /// if you have problems with mmap failing because of fragmented + /// address space. If you get an OutOfMemoryException, it is recommened + /// to reduce the chunk size, until it works. + /// + ///

Due to + /// this bug in Sun's JRE, MMapDirectory's + /// is unable to close the underlying OS file handle. Only when GC + /// finally collects the underlying objects, which could be quite + /// some time later, will the file handle be closed. + /// + ///

This will consume additional transient disk usage: on Windows, + /// attempts to delete or overwrite the files will result in an + /// exception; on other platforms, which typically have a "delete on + /// last close" semantics, while such operations will succeed, the bytes + /// are still consuming space on disk. For many applications this + /// limitation is not a problem (e.g. if you have plenty of disk space, + /// and you don't rely on overwriting files on Windows) but it's still + /// an important limitation to be aware of. + /// + ///

This class supplies the workaround mentioned in the bug report + /// (disabled by default, see ), which may fail on + /// non-Sun JVMs. It forcefully unmaps the buffer on close by using + /// an undocumented internal cleanup functionality. + /// is true, if the workaround + /// can be enabled (with no guarantees). + ///

+ public class MMapDirectory:FSDirectory + { + private class AnonymousClassPrivilegedExceptionAction // : SupportClass.IPriviligedAction // {{Aroush-2.9}} + { + public AnonymousClassPrivilegedExceptionAction(byte[] buffer, MMapDirectory enclosingInstance) + { + InitBlock(buffer, enclosingInstance); + } + private void InitBlock(byte[] buffer, MMapDirectory enclosingInstance) + { + this.buffer = buffer; + this.enclosingInstance = enclosingInstance; + } + private byte[] buffer; + private MMapDirectory enclosingInstance; + public MMapDirectory Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + public virtual System.Object Run() + { + // {{Aroush-2.9 + /* + System.Reflection.MethodInfo getCleanerMethod = buffer.GetType().GetMethod("cleaner", (Lucene.Net.Store.MMapDirectory.NO_PARAM_TYPES == null)?new System.Type[0]:(System.Type[]) Lucene.Net.Store.MMapDirectory.NO_PARAM_TYPES); + getCleanerMethod.SetAccessible(true); + System.Object cleaner = getCleanerMethod.Invoke(buffer, (System.Object[]) Lucene.Net.Store.MMapDirectory.NO_PARAMS); + if (cleaner != null) + { + cleaner.GetType().GetMethod("clean", (Lucene.Net.Store.MMapDirectory.NO_PARAM_TYPES == null)?new System.Type[0]:(System.Type[]) Lucene.Net.Store.MMapDirectory.NO_PARAM_TYPES).Invoke(cleaner, (System.Object[]) Lucene.Net.Store.MMapDirectory.NO_PARAMS); + } + */ + //System.Diagnostics.Debug.Fail("Port issue:", "sun.misc.Cleaner()"); // {{Aroush-2.9}} + throw new NotImplementedException("Port issue: sun.misc.Cleaner()"); + // Aroush-2.9}} + //return null; + } + } + private void InitBlock() + { + maxBBuf = Constants.JRE_IS_64BIT?System.Int32.MaxValue:(256 * 1024 * 1024); + } + + /// Create a new MMapDirectory for the named location. + /// + /// + /// the path of the directory + /// + /// the lock factory to use, or null for the default. + /// + /// IOException + public MMapDirectory(System.IO.DirectoryInfo path, LockFactory lockFactory) + : base(path, lockFactory) + { + InitBlock(); + } + + /// Create a new MMapDirectory for the named location and the default lock factory. + /// + /// + /// the path of the directory + /// + /// IOException + public MMapDirectory(System.IO.DirectoryInfo path) + : base(path, null) + { + InitBlock(); + } + + private bool useUnmapHack = false; + private int maxBBuf; + + /// true, if this platform supports unmapping mmaped files. + public static bool UNMAP_SUPPORTED; + + /// Enables or disables the workaround for unmapping the buffers + /// from address space after closing , that is + /// mentioned in the bug report. This hack may fail on non-Sun JVMs. + /// It forcefully unmaps the buffer on close by using + /// an undocumented internal cleanup functionality. + ///

NOTE: Enabling this is completely unsupported + /// by Java and may lead to JVM crashs if IndexInput + /// is closed while another thread is still accessing it (SIGSEGV). + ///

+ /// IllegalArgumentException if + /// is false and the workaround cannot be enabled. + /// + public virtual bool UseUnmap + { + get { return useUnmapHack; } + set + { + if (value && !UNMAP_SUPPORTED) + throw new System.ArgumentException("Unmap hack not supported on this platform!"); + this.useUnmapHack = value; + } + } + + /// Try to unmap the buffer, this method silently fails if no support + /// for that in the JVM. On Windows, this leads to the fact, + /// that mmapped files cannot be modified or deleted. + /// + internal void CleanMapping(System.IO.MemoryStream buffer) + { + if (useUnmapHack) + { + try + { + // {{Aroush-2.9}} Not converted: java.security.AccessController.doPrivileged() + //System.Diagnostics.Debug.Fail("Port issue:", "java.security.AccessController.doPrivileged()"); // {{Aroush-2.9}} + throw new NotImplementedException("Port issue: java.security.AccessController.doPrivileged()"); + // AccessController.DoPrivileged(new AnonymousClassPrivilegedExceptionAction(buffer, this)); + } + catch (System.Exception e) + { + System.IO.IOException ioe = new System.IO.IOException("unable to unmap the mapped buffer", e.InnerException); + throw ioe; + } + } + } + + /// Gets or sets the maximum chunk size (default is for + /// 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping. + /// Especially on 32 bit platform, the address space can be very fragmented, + /// so large index files cannot be mapped. + /// Using a lower chunk size makes the directory implementation a little + /// bit slower (as the correct chunk must be resolved on each seek) + /// but the chance is higher that mmap does not fail. On 64 bit + /// Java platforms, this parameter should always be , + /// as the adress space is big enough. + /// + public virtual int MaxChunkSize + { + get { return maxBBuf; } + set + { + if (value <= 0) + throw new System.ArgumentException("Maximum chunk size for mmap must be >0"); + this.maxBBuf = value; + } + } + + private class MMapIndexInput : IndexInput + { + private void InitBlock(MMapDirectory enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private MMapDirectory enclosingInstance; + public MMapDirectory Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + private System.IO.MemoryStream buffer; + private long length; + private bool isClone; + private bool isDisposed; + + internal MMapIndexInput(MMapDirectory enclosingInstance, System.IO.FileStream raf) + { + byte[] data = new byte[raf.Length]; + raf.Read(data, 0, (int) raf.Length); + + InitBlock(enclosingInstance); + this.length = raf.Length; + this.buffer = new System.IO.MemoryStream(data); + } + + public override byte ReadByte() + { + try + { + return (byte) buffer.ReadByte(); + } + catch (ObjectDisposedException) + { + throw new System.IO.IOException("read past EOF"); + } + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + try + { + buffer.Read(b, offset, len); + } + catch (ObjectDisposedException) + { + throw new System.IO.IOException("read past EOF"); + } + } + + public override long FilePointer + { + get + { + return buffer.Position; + } + } + + public override void Seek(long pos) + { + buffer.Seek(pos, System.IO.SeekOrigin.Begin); + } + + public override long Length() + { + return length; + } + + public override System.Object Clone() + { + if (buffer == null) + throw new AlreadyClosedException("MMapIndexInput already closed"); + MMapIndexInput clone = (MMapIndexInput) base.Clone(); + clone.isClone = true; + // clone.buffer = buffer.duplicate(); // {{Aroush-1.9}} + return clone; + } + + protected override void Dispose(bool isDisposing) + { + if (isDisposed) return; + + if (isDisposing) + { + if (isClone || buffer == null) + return; + // unmap the buffer (if enabled) and at least unset it for GC + try + { + Enclosing_Instance.CleanMapping(buffer); + } + finally + { + buffer = null; + } + } + + isDisposed = true; + } + } + + // Because Java's ByteBuffer uses an int to address the + // values, it's necessary to access a file > + // Integer.MAX_VALUE in size using multiple byte buffers. + protected internal class MultiMMapIndexInput:IndexInput, System.ICloneable + { + private void InitBlock(MMapDirectory enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private MMapDirectory enclosingInstance; + public MMapDirectory Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + + private System.IO.MemoryStream[] buffers; + private int[] bufSizes; // keep here, ByteBuffer.size() method is optional + + private long length; + + private bool isDisposed; + + private int curBufIndex; + private int maxBufSize; + + private System.IO.MemoryStream curBuf; // redundant for speed: buffers[curBufIndex] + private int curAvail; // redundant for speed: (bufSizes[curBufIndex] - curBuf.position()) + + private bool isClone = false; + + public MultiMMapIndexInput(MMapDirectory enclosingInstance, System.IO.FileStream raf, int maxBufSize) + { + InitBlock(enclosingInstance); + this.length = raf.Length; + this.maxBufSize = maxBufSize; + + if (maxBufSize <= 0) + throw new System.ArgumentException("Non positive maxBufSize: " + maxBufSize); + + if ((length / maxBufSize) > System.Int32.MaxValue) + { + throw new System.ArgumentException("RandomAccessFile too big for maximum buffer size: " + raf.ToString()); + } + + int nrBuffers = (int) (length / maxBufSize); + if (((long) nrBuffers * maxBufSize) < length) + nrBuffers++; + + this.buffers = new System.IO.MemoryStream[nrBuffers]; + this.bufSizes = new int[nrBuffers]; + + long bufferStart = 0; + System.IO.FileStream rafc = raf; + for (int bufNr = 0; bufNr < nrBuffers; bufNr++) + { + byte[] data = new byte[rafc.Length]; + raf.Read(data, 0, (int) rafc.Length); + + int bufSize = (length > (bufferStart + maxBufSize))?maxBufSize:(int) (length - bufferStart); + this.buffers[bufNr] = new System.IO.MemoryStream(data); + this.bufSizes[bufNr] = bufSize; + bufferStart += bufSize; + } + Seek(0L); + } + + public override byte ReadByte() + { + // Performance might be improved by reading ahead into an array of + // e.g. 128 bytes and readByte() from there. + if (curAvail == 0) + { + curBufIndex++; + if (curBufIndex >= buffers.Length) + throw new System.IO.IOException("read past EOF"); + curBuf = buffers[curBufIndex]; + curBuf.Seek(0, System.IO.SeekOrigin.Begin); + curAvail = bufSizes[curBufIndex]; + } + curAvail--; + return (byte) curBuf.ReadByte(); + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + while (len > curAvail) + { + curBuf.Read(b, offset, curAvail); + len -= curAvail; + offset += curAvail; + curBufIndex++; + if (curBufIndex >= buffers.Length) + throw new System.IO.IOException("read past EOF"); + curBuf = buffers[curBufIndex]; + curBuf.Seek(0, System.IO.SeekOrigin.Begin); + curAvail = bufSizes[curBufIndex]; + } + curBuf.Read(b, offset, len); + curAvail -= len; + } + + public override long FilePointer + { + get { return ((long) curBufIndex*maxBufSize) + curBuf.Position; } + } + + public override void Seek(long pos) + { + curBufIndex = (int) (pos / maxBufSize); + curBuf = buffers[curBufIndex]; + int bufOffset = (int) (pos - ((long) curBufIndex * maxBufSize)); + curBuf.Seek(bufOffset, System.IO.SeekOrigin.Begin); + curAvail = bufSizes[curBufIndex] - bufOffset; + } + + public override long Length() + { + return length; + } + + public override System.Object Clone() + { + MultiMMapIndexInput clone = (MultiMMapIndexInput) base.Clone(); + clone.isClone = true; + clone.buffers = new System.IO.MemoryStream[buffers.Length]; + // No need to clone bufSizes. + // Since most clones will use only one buffer, duplicate() could also be + // done lazy in clones, e.g. when adapting curBuf. + for (int bufNr = 0; bufNr < buffers.Length; bufNr++) + { + clone.buffers[bufNr] = buffers[bufNr]; // clone.buffers[bufNr] = buffers[bufNr].duplicate(); // {{Aroush-1.9}} how do we clone?! + } + try + { + clone.Seek(FilePointer); + } + catch (System.IO.IOException ioe) + { + System.SystemException newException = new System.SystemException(ioe.Message, ioe); + throw newException; + } + return clone; + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + if (isClone || buffers == null) + return; + try + { + for (int bufNr = 0; bufNr < buffers.Length; bufNr++) + { + // unmap the buffer (if enabled) and at least unset it for GC + try + { + Enclosing_Instance.CleanMapping(buffers[bufNr]); + } + finally + { + buffers[bufNr] = null; + } + } + } + finally + { + buffers = null; + } + isDisposed = true; + } + } + + /// Creates an IndexInput for the file with the given name. + public override IndexInput OpenInput(System.String name, int bufferSize) + { + EnsureOpen(); + System.String path = System.IO.Path.Combine(Directory.FullName, name); + System.IO.FileStream raf = new System.IO.FileStream(path, System.IO.FileMode.Open, System.IO.FileAccess.Read); + try + { + return (raf.Length <= (long) maxBBuf)?(IndexInput) new MMapIndexInput(this, raf):(IndexInput) new MultiMMapIndexInput(this, raf, maxBBuf); + } + finally + { + raf.Close(); + } + } + + /// Creates an IndexOutput for the file with the given name. + public override IndexOutput CreateOutput(System.String name) + { + InitOutput(name); + return new SimpleFSDirectory.SimpleFSIndexOutput(new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name))); + } + static MMapDirectory() + { + { + bool v; + try + { + // {{Aroush-2.9 + /* + System.Type.GetType("sun.misc.Cleaner"); // {{Aroush-2.9}} port issue? + System.Type.GetType("java.nio.DirectByteBuffer").GetMethod("cleaner", (NO_PARAM_TYPES == null)?new System.Type[0]:(System.Type[]) NO_PARAM_TYPES); + */ + //System.Diagnostics.Debug.Fail("Port issue:", "sun.misc.Cleaner.clean()"); // {{Aroush-2.9}} + throw new NotImplementedException("Port issue: sun.misc.Cleaner.clean()"); + // Aroush-2.9}} + //v = true; + } + catch (System.Exception) + { + v = false; + } + UNMAP_SUPPORTED = v; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/NIOFSDirectory.cs b/external/Lucene.Net.Light/src/core/Store/NIOFSDirectory.cs new file mode 100644 index 0000000000..190a53315f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/NIOFSDirectory.cs @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +namespace Lucene.Net.Store +{ + /// + /// Not implemented. Waiting for volunteers. + /// + public class NIOFSDirectory : Lucene.Net.Store.FSDirectory + { + public NIOFSDirectory(System.IO.DirectoryInfo dir, LockFactory lockFactory) + : base(dir, lockFactory) + { + throw new System.NotImplementedException("Waiting for volunteers to implement this class"); + } + + /// + /// Not implemented. Waiting for volunteers. + /// + public class NIOFSIndexInput + { + public NIOFSIndexInput() + { + throw new System.NotImplementedException("Waiting for volunteers to implement this class"); + } + } + + public override IndexOutput CreateOutput(string name) + { + throw new System.NotImplementedException("Waiting for volunteers to implement this class"); + } + } +} + + +//namespace Lucene.Net.Store +//{ + +// /// An implementation that uses +// /// java.nio's FileChannel's positional read, which allows +// /// multiple threads to read from the same file without +// /// synchronizing. +// /// +// ///

This class only uses FileChannel when reading; writing +// /// is achieved with . +// /// +// ///

NOTE: NIOFSDirectory is not recommended on Windows because of a bug +// /// in how FileChannel.read is implemented in Sun's JRE. +// /// Inside of the implementation the position is apparently +// /// synchronized. See here +// /// for details. +// ///

+// public class NIOFSDirectory:FSDirectory +// { + +// /// Create a new NIOFSDirectory for the named location. +// /// +// /// +// /// the path of the directory +// /// +// /// the lock factory to use, or null for the default. +// /// +// /// IOException +// [System.Obsolete("Use the constructor that takes a DirectoryInfo, this will be removed in the 3.0 release")] +// public NIOFSDirectory(System.IO.FileInfo path, LockFactory lockFactory):base(new System.IO.DirectoryInfo(path.FullName), lockFactory) +// { +// } + +// /// Create a new NIOFSDirectory for the named location. +// /// +// /// +// /// the path of the directory +// /// +// /// the lock factory to use, or null for the default. +// /// +// /// IOException +// public NIOFSDirectory(System.IO.DirectoryInfo path, LockFactory lockFactory) : base(path, lockFactory) +// { +// } + +// /// Create a new NIOFSDirectory for the named location and the default lock factory. +// /// +// /// +// /// the path of the directory +// /// +// /// IOException +// [System.Obsolete("Use the constructor that takes a DirectoryInfo, this will be removed in the 3.0 release")] +// public NIOFSDirectory(System.IO.FileInfo path):base(new System.IO.DirectoryInfo(path.FullName), null) +// { +// } + +// /// Create a new NIOFSDirectory for the named location and the default lock factory. +// /// +// /// +// /// the path of the directory +// /// +// /// IOException +// public NIOFSDirectory(System.IO.DirectoryInfo path) : base(path, null) +// { +// } + +// // back compatibility so FSDirectory can instantiate via reflection +// /// +// /// +// [Obsolete] +// internal NIOFSDirectory() +// { +// } + +// /// Creates an IndexInput for the file with the given name. +// public override IndexInput OpenInput(System.String name, int bufferSize) +// { +// EnsureOpen(); +// return new NIOFSIndexInput(new System.IO.FileInfo(System.IO.Path.Combine(GetFile().FullName, name)), bufferSize, GetReadChunkSize()); +// } + +// /// Creates an IndexOutput for the file with the given name. +// public override IndexOutput CreateOutput(System.String name) +// { +// InitOutput(name); +// return new SimpleFSDirectory.SimpleFSIndexOutput(new System.IO.FileInfo(System.IO.Path.Combine(directory.FullName, name))); +// } + +// public /*protected internal*/ class NIOFSIndexInput:SimpleFSDirectory.SimpleFSIndexInput +// { + +// private System.IO.MemoryStream byteBuf; // wraps the buffer for NIO + +// private byte[] otherBuffer; +// private System.IO.MemoryStream otherByteBuf; + +// internal System.IO.BinaryReader channel; + +// /// Please use ctor taking chunkSize +// /// +// [Obsolete("Please use ctor taking chunkSize")] +// public NIOFSIndexInput(System.IO.FileInfo path, int bufferSize):this(path, bufferSize, FSDirectory.DEFAULT_READ_CHUNK_SIZE) +// { +// } + +// public NIOFSIndexInput(System.IO.FileInfo path, int bufferSize, int chunkSize):base(path, bufferSize, chunkSize) +// { +// channel = (System.IO.BinaryReader) file; +// } + +// protected internal override void NewBuffer(byte[] newBuffer) +// { +// base.NewBuffer(newBuffer); +// // {{Aroush-2.9}} byteBuf = ByteBuffer.wrap(newBuffer); +// System.Diagnostics.Debug.Fail("Port issue:", "byteBuf = ByteBuffer.wrap(newBuffer)"); // {{Aroush-2.9}} +// } + +// public override void Close() +// { +// if (!isClone && file.isOpen) +// { +// // Close the channel & file +// try +// { +// channel.Close(); +// } +// finally +// { +// file.Close(); +// } +// } +// } + +// public override void ReadInternal(byte[] b, int offset, int len) +// { + +// System.IO.MemoryStream bb; + +// // Determine the ByteBuffer we should use +// if (b == buffer && 0 == offset) +// { +// // Use our own pre-wrapped byteBuf: +// System.Diagnostics.Debug.Assert(byteBuf != null); +// byteBuf.Position = 0; +// byteBuf.Capacity = len; +// bb = byteBuf; +// } +// else +// { +// if (offset == 0) +// { +// if (otherBuffer != b) +// { +// // Now wrap this other buffer; with compound +// // file, we are repeatedly called with its +// // buffer, so we wrap it once and then re-use it +// // on subsequent calls +// otherBuffer = b; +// // otherByteBuf = ByteBuffer.wrap(b); {{Aroush-2.9}} +// System.Diagnostics.Debug.Fail("Port issue:", "otherByteBuf = ByteBuffer.wrap(b)"); // {{Aroush-2.9}} +// } +// else +// otherByteBuf.Position = 0; +// otherByteBuf.Capacity = len; +// bb = otherByteBuf; +// } +// else +// { +// // Always wrap when offset != 0 +// bb = null; // bb = ByteBuffer.wrap(b, offset, len); {{Aroush-2.9}} +// System.Diagnostics.Debug.Fail("Port issue:", "bb = ByteBuffer.wrap(b, offset, len)"); // {{Aroush-2.9}} +// } +// } + +// int readOffset = (int) bb.Position; +// int readLength = bb.Capacity - readOffset; +// System.Diagnostics.Debug.Assert(readLength == len); + +// long pos = GetFilePointer(); + +// try +// { +// while (readLength > 0) +// { +// int limit; +// if (readLength > chunkSize) +// { +// // LUCENE-1566 - work around JVM Bug by breaking +// // very large reads into chunks +// limit = readOffset + chunkSize; +// } +// else +// { +// limit = readOffset + readLength; +// } +// bb.Capacity = limit; +// int i = -1; // int i = channel.Read(bb, pos, limit); // {{Aroush-2.9}} must read from 'channel' into 'bb' +// System.Diagnostics.Debug.Fail("Port issue:", "channel.Read(bb, pos, limit)"); // {{Aroush-2.9}} +// if (i == - 1) +// { +// throw new System.IO.IOException("read past EOF"); +// } +// pos += i; +// readOffset += i; +// readLength -= i; +// } +// } +// catch (System.OutOfMemoryException e) +// { +// // propagate OOM up and add a hint for 32bit VM Users hitting the bug +// // with a large chunk size in the fast path. +// System.OutOfMemoryException outOfMemoryError = new System.OutOfMemoryException("OutOfMemoryError likely caused by the Sun VM Bug described in " + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + "with a a value smaller than the current chunk size (" + chunkSize + ")", e); +// throw outOfMemoryError; +// } +// } +// } +// } +//} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/NativeFSLockFactory.cs b/external/Lucene.Net.Light/src/core/Store/NativeFSLockFactory.cs new file mode 100644 index 0000000000..ffab47b51a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/NativeFSLockFactory.cs @@ -0,0 +1,440 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Store +{ + + ///

Implements using native OS file + /// locks. Note that because this LockFactory relies on + /// java.nio.* APIs for locking, any problems with those APIs + /// will cause locking to fail. Specifically, on certain NFS + /// environments the java.nio.* locks will fail (the lock can + /// incorrectly be double acquired) whereas + /// worked perfectly in those same + /// environments. For NFS based access to an index, it's + /// recommended that you try + /// first and work around the one limitation that a lock file + /// could be left when the JVM exits abnormally.

+ /// + ///

The primary benefit of is + /// that lock files will be properly removed (by the OS) if + /// the JVM has an abnormal exit.

+ /// + ///

Note that, unlike , the existence of + /// leftover lock files in the filesystem on exiting the JVM + /// is fine because the OS will free the locks held against + /// these files even though the files still remain.

+ /// + ///

If you suspect that this or any other LockFactory is + /// not working properly in your environment, you can easily + /// test it by using , + /// and .

+ /// + ///

+ /// + /// + + public class NativeFSLockFactory : FSLockFactory + { + /// Create a NativeFSLockFactory instance, with null (unset) + /// lock directory. When you pass this factory to a + /// subclass, the lock directory is automatically set to the + /// directory itsself. Be sure to create one instance for each directory + /// your create! + /// + public NativeFSLockFactory():this((System.IO.DirectoryInfo) null) + { + } + + /// Create a NativeFSLockFactory instance, storing lock + /// files into the specified lockDirName: + /// + /// + /// where lock files are created. + /// + public NativeFSLockFactory(System.String lockDirName):this(new System.IO.DirectoryInfo(lockDirName)) + { + } + + /// Create a NativeFSLockFactory instance, storing lock + /// files into the specified lockDir: + /// + /// + /// where lock files are created. + /// + public NativeFSLockFactory(System.IO.DirectoryInfo lockDir) + { + LockDir = lockDir; + } + + public override Lock MakeLock(System.String lockName) + { + lock (this) + { + if (internalLockPrefix != null) + lockName = internalLockPrefix + "-" + lockName; + return new NativeFSLock(internalLockDir, lockName); + } + } + + public override void ClearLock(System.String lockName) + { + // Note that this isn't strictly required anymore + // because the existence of these files does not mean + // they are locked, but, still do this in case people + // really want to see the files go away: + bool tmpBool; + if (System.IO.File.Exists(internalLockDir.FullName)) + tmpBool = true; + else + tmpBool = System.IO.Directory.Exists(internalLockDir.FullName); + if (tmpBool) + { + if (internalLockPrefix != null) + { + lockName = internalLockPrefix + "-" + lockName; + } + System.IO.FileInfo lockFile = new System.IO.FileInfo(System.IO.Path.Combine(internalLockDir.FullName, lockName)); + bool tmpBool2; + if (System.IO.File.Exists(lockFile.FullName)) + tmpBool2 = true; + else + tmpBool2 = System.IO.Directory.Exists(lockFile.FullName); + bool tmpBool3; + if (System.IO.File.Exists(lockFile.FullName)) + { + System.IO.File.Delete(lockFile.FullName); + tmpBool3 = true; + } + else if (System.IO.Directory.Exists(lockFile.FullName)) + { + System.IO.Directory.Delete(lockFile.FullName); + tmpBool3 = true; + } + else + tmpBool3 = false; + if (tmpBool2 && !tmpBool3) + { + throw new System.IO.IOException("Cannot delete " + lockFile); + } + } + } + } + + + class NativeFSLock:Lock + { + + private System.IO.FileStream f; + private System.IO.FileStream channel; + private bool lock_Renamed; + private System.IO.FileInfo path; + private System.IO.DirectoryInfo lockDir; + + /* + * The javadocs for FileChannel state that you should have + * a single instance of a FileChannel (per JVM) for all + * locking against a given file. To ensure this, we have + * a single (static) HashSet that contains the file paths + * of all currently locked locks. This protects against + * possible cases where different Directory instances in + * one JVM (each with their own NativeFSLockFactory + * instance) have set the same lock dir and lock prefix. + */ + private static HashSet LOCK_HELD = new HashSet(); + + public NativeFSLock(System.IO.DirectoryInfo lockDir, System.String lockFileName) + { + this.lockDir = lockDir; + path = new System.IO.FileInfo(System.IO.Path.Combine(lockDir.FullName, lockFileName)); + } + + private bool LockExists() + { + lock (this) + { + return lock_Renamed != false; + } + } + + public override bool Obtain() + { + lock (this) + { + + if (LockExists()) + { + // Our instance is already locked: + return false; + } + + // Ensure that lockDir exists and is a directory. + bool tmpBool; + if (System.IO.File.Exists(lockDir.FullName)) + tmpBool = true; + else + tmpBool = System.IO.Directory.Exists(lockDir.FullName); + if (!tmpBool) + { + try + { + System.IO.Directory.CreateDirectory(lockDir.FullName); + } + catch + { + throw new System.IO.IOException("Cannot create directory: " + lockDir.FullName); + } + } + else if (!System.IO.Directory.Exists(lockDir.FullName)) + { + throw new System.IO.IOException("Found regular file where directory expected: " + lockDir.FullName); + } + + System.String canonicalPath = path.FullName; + + bool markedHeld = false; + + try + { + + // Make sure nobody else in-process has this lock held + // already, and, mark it held if not: + + lock (LOCK_HELD) + { + if (LOCK_HELD.Contains(canonicalPath)) + { + // Someone else in this JVM already has the lock: + return false; + } + else + { + // This "reserves" the fact that we are the one + // thread trying to obtain this lock, so we own + // the only instance of a channel against this + // file: + LOCK_HELD.Add(canonicalPath); + markedHeld = true; + } + } + + try + { + f = new System.IO.FileStream(path.FullName, System.IO.FileMode.OpenOrCreate, System.IO.FileAccess.ReadWrite); + } + catch (System.IO.IOException e) + { + // On Windows, we can get intermittent "Access + // Denied" here. So, we treat this as failure to + // acquire the lock, but, store the reason in case + // there is in fact a real error case. + failureReason = e; + f = null; + } + // lucene.net: UnauthorizedAccessException does not derive from IOException like in java + catch (System.UnauthorizedAccessException e) + { + // On Windows, we can get intermittent "Access + // Denied" here. So, we treat this as failure to + // acquire the lock, but, store the reason in case + // there is in fact a real error case. + failureReason = e; + f = null; + } + + if (f != null) + { + try + { + channel = f; + lock_Renamed = false; + try + { + channel.Lock(0, channel.Length); + lock_Renamed = true; + } + catch (System.IO.IOException e) + { + // At least on OS X, we will sometimes get an + // intermittent "Permission Denied" IOException, + // which seems to simply mean "you failed to get + // the lock". But other IOExceptions could be + // "permanent" (eg, locking is not supported via + // the filesystem). So, we record the failure + // reason here; the timeout obtain (usually the + // one calling us) will use this as "root cause" + // if it fails to get the lock. + failureReason = e; + } + // lucene.net: UnauthorizedAccessException does not derive from IOException like in java + catch (System.UnauthorizedAccessException e) + { + // At least on OS X, we will sometimes get an + // intermittent "Permission Denied" IOException, + // which seems to simply mean "you failed to get + // the lock". But other IOExceptions could be + // "permanent" (eg, locking is not supported via + // the filesystem). So, we record the failure + // reason here; the timeout obtain (usually the + // one calling us) will use this as "root cause" + // if it fails to get the lock. + failureReason = e; + } + finally + { + if (lock_Renamed == false) + { + try + { + channel.Close(); + } + finally + { + channel = null; + } + } + } + } + finally + { + if (channel == null) + { + try + { + f.Close(); + } + finally + { + f = null; + } + } + } + } + } + finally + { + if (markedHeld && !LockExists()) + { + lock (LOCK_HELD) + { + if (LOCK_HELD.Contains(canonicalPath)) + { + LOCK_HELD.Remove(canonicalPath); + } + } + } + } + return LockExists(); + } + } + + public override void Release() + { + lock (this) + { + if (LockExists()) + { + try + { + channel.Unlock(0, channel.Length); + } + finally + { + lock_Renamed = false; + try + { + channel.Close(); + } + finally + { + channel = null; + try + { + f.Close(); + } + finally + { + f = null; + lock (LOCK_HELD) + { + LOCK_HELD.Remove(path.FullName); + } + } + } + } + bool tmpBool; + if (System.IO.File.Exists(path.FullName)) + { + System.IO.File.Delete(path.FullName); + tmpBool = true; + } + else if (System.IO.Directory.Exists(path.FullName)) + { + System.IO.Directory.Delete(path.FullName); + tmpBool = true; + } + else + tmpBool = false; + if (!tmpBool) + throw new LockReleaseFailedException("failed to delete " + path); + } + } + } + + public override bool IsLocked() + { + lock (this) + { + // The test for is isLocked is not directly possible with native file locks: + + // First a shortcut, if a lock reference in this instance is available + if (LockExists()) + return true; + + // Look if lock file is present; if not, there can definitely be no lock! + bool tmpBool; + if (System.IO.File.Exists(path.FullName)) + tmpBool = true; + else + tmpBool = System.IO.Directory.Exists(path.FullName); + if (!tmpBool) + return false; + + // Try to obtain and release (if was locked) the lock + try + { + bool obtained = Obtain(); + if (obtained) + Release(); + return !obtained; + } + catch (System.IO.IOException) + { + return false; + } + } + } + + public override System.String ToString() + { + return "NativeFSLock@" + path; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/NoLockFactory.cs b/external/Lucene.Net.Light/src/core/Store/NoLockFactory.cs new file mode 100644 index 0000000000..4b5c5c5a42 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/NoLockFactory.cs @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// Use this to disable locking entirely. + /// Only one instance of this lock is created. You should call + /// to get the instance. + /// + /// + /// + /// + + public class NoLockFactory : LockFactory + { + + // Single instance returned whenever makeLock is called. + private static NoLock singletonLock = new NoLock(); + private static NoLockFactory singleton = new NoLockFactory(); + + public static NoLockFactory Instance + { + get { return singleton; } + } + + public override Lock MakeLock(System.String lockName) + { + return singletonLock; + } + + public override void ClearLock(System.String lockName) + { + } + + } + + + class NoLock:Lock + { + public override bool Obtain() + { + return true; + } + + public override void Release() + { + } + + public override bool IsLocked() + { + return false; + } + + public override System.String ToString() + { + return "NoLock"; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/NoSuchDirectoryException.cs b/external/Lucene.Net.Light/src/core/Store/NoSuchDirectoryException.cs new file mode 100644 index 0000000000..c3b01aea53 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/NoSuchDirectoryException.cs @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// This exception is thrown when you try to list a + /// non-existent directory. + /// + + [Serializable] + public class NoSuchDirectoryException:System.IO.FileNotFoundException + { + public NoSuchDirectoryException(System.String message):base(message) + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/RAMDirectory.cs b/external/Lucene.Net.Light/src/core/Store/RAMDirectory.cs new file mode 100644 index 0000000000..c5c06e3b57 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/RAMDirectory.cs @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Store +{ + + /// A memory-resident implementation. Locking + /// implementation is by default the + /// but can be changed with . + /// + [Serializable] + public class RAMDirectory:Directory + { + + private const long serialVersionUID = 1L; + + internal protected HashMap fileMap = new HashMap(); + internal protected long internalSizeInBytes = 0; + + // ***** + // Lock acquisition sequence: RAMDirectory, then RAMFile + // ***** + + /// Constructs an empty . + public RAMDirectory() + { + SetLockFactory(new SingleInstanceLockFactory()); + } + + /// Creates a new RAMDirectory instance from a different + /// Directory implementation. This can be used to load + /// a disk-based index into memory. + ///

+ /// This should be used only with indices that can fit into memory. + ///

+ /// Note that the resulting RAMDirectory instance is fully + /// independent from the original Directory (it is a + /// complete copy). Any subsequent changes to the + /// original Directory will not be visible in the + /// RAMDirectory instance. + /// + ///

+ /// a Directory value + /// + /// if an error occurs + /// + public RAMDirectory(Directory dir):this(dir, false) + { + } + + private RAMDirectory(Directory dir, bool closeDir):this() + { + Directory.Copy(dir, this, closeDir); + } + + //https://issues.apache.org/jira/browse/LUCENENET-174 + [System.Runtime.Serialization.OnDeserialized] + void OnDeserialized(System.Runtime.Serialization.StreamingContext context) + { + if (interalLockFactory == null) + { + SetLockFactory(new SingleInstanceLockFactory()); + } + } + + public override System.String[] ListAll() + { + lock (this) + { + EnsureOpen(); + // TODO: may have better performance if our HashMap implmented KeySet() instead of generating one via HashSet + System.Collections.Generic.ISet fileNames = Support.Compatibility.SetFactory.CreateHashSet(fileMap.Keys); + System.String[] result = new System.String[fileNames.Count]; + int i = 0; + foreach(string filename in fileNames) + { + result[i++] = filename; + } + return result; + } + } + + /// Returns true iff the named file exists in this directory. + public override bool FileExists(System.String name) + { + EnsureOpen(); + RAMFile file; + lock (this) + { + file = fileMap[name]; + } + return file != null; + } + + /// Returns the time the named file was last modified. + /// IOException if the file does not exist + public override long FileModified(System.String name) + { + EnsureOpen(); + RAMFile file; + lock (this) + { + file = fileMap[name]; + } + if (file == null) + throw new System.IO.FileNotFoundException(name); + + // RAMOutputStream.Flush() was changed to use DateTime.UtcNow. + // Convert it back to local time before returning (previous behavior) + return new DateTime(file.LastModified*TimeSpan.TicksPerMillisecond, DateTimeKind.Utc).ToLocalTime().Ticks/ + TimeSpan.TicksPerMillisecond; + } + + /// Set the modified time of an existing file to now. + /// IOException if the file does not exist + public override void TouchFile(System.String name) + { + EnsureOpen(); + RAMFile file; + lock (this) + { + file = fileMap[name]; + } + if (file == null) + throw new System.IO.FileNotFoundException(name); + + long ts2, ts1 = System.DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; + do + { + try + { + System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 0 + 100 * 1)); + } + catch (System.Threading.ThreadInterruptedException ie) + { + // In 3.0 we will change this to throw + // InterruptedException instead + ThreadClass.Current().Interrupt(); + throw new System.SystemException(ie.Message, ie); + } + ts2 = System.DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond; + } + while (ts1 == ts2); + + file.LastModified = ts2; + } + + /// Returns the length in bytes of a file in the directory. + /// IOException if the file does not exist + public override long FileLength(System.String name) + { + EnsureOpen(); + RAMFile file; + lock (this) + { + file = fileMap[name]; + } + if (file == null) + throw new System.IO.FileNotFoundException(name); + return file.Length; + } + + /// Return total size in bytes of all files in this + /// directory. This is currently quantized to + /// RAMOutputStream.BUFFER_SIZE. + /// + public long SizeInBytes() + { + lock (this) + { + EnsureOpen(); + return internalSizeInBytes; + } + } + + /// Removes an existing file in the directory. + /// IOException if the file does not exist + public override void DeleteFile(System.String name) + { + lock (this) + { + EnsureOpen(); + RAMFile file = fileMap[name]; + if (file != null) + { + fileMap.Remove(name); + file.directory = null; + internalSizeInBytes -= file.sizeInBytes; + } + else + throw new System.IO.FileNotFoundException(name); + } + } + + /// Creates a new, empty file in the directory with the given name. Returns a stream writing this file. + public override IndexOutput CreateOutput(System.String name) + { + EnsureOpen(); + RAMFile file = new RAMFile(this); + lock (this) + { + RAMFile existing = fileMap[name]; + if (existing != null) + { + internalSizeInBytes -= existing.sizeInBytes; + existing.directory = null; + } + fileMap[name] = file; + } + return new RAMOutputStream(file); + } + + /// Returns a stream reading an existing file. + public override IndexInput OpenInput(System.String name) + { + EnsureOpen(); + RAMFile file; + lock (this) + { + file = fileMap[name]; + } + if (file == null) + throw new System.IO.FileNotFoundException(name); + return new RAMInputStream(file); + } + + /// Closes the store to future operations, releasing associated memory. + protected override void Dispose(bool disposing) + { + isOpen = false; + fileMap = null; + } + + //public HashMap fileMap_ForNUnit + //{ + // get { return fileMap; } + //} + + //public long sizeInBytes_ForNUnitTest + //{ + // get { return sizeInBytes; } + // set { sizeInBytes = value; } + //} + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/RAMFile.cs b/external/Lucene.Net.Light/src/core/Store/RAMFile.cs new file mode 100644 index 0000000000..fd9daed8a0 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/RAMFile.cs @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + [Serializable] + public class RAMFile + { + + private const long serialVersionUID = 1L; + + protected System.Collections.Generic.List buffers = new System.Collections.Generic.List(); + internal long length; + internal RAMDirectory directory; + internal long sizeInBytes; + + // This is publicly modifiable via Directory.touchFile(), so direct access not supported + private long lastModified = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond); + + // File used as buffer, in no RAMDirectory + public /*internal*/ RAMFile() + { + } + + public /*internal*/ RAMFile(RAMDirectory directory) + { + this.directory = directory; + } + + // For non-stream access from thread that might be concurrent with writing + + internal virtual long Length + { + get + { + lock (this) + { + return length; + } + } + set + { + lock (this) + { + this.length = value; + } + } + } + + // For non-stream access from thread that might be concurrent with writing + + internal virtual long LastModified + { + get + { + lock (this) + { + return lastModified; + } + } + set + { + lock (this) + { + this.lastModified = value; + } + } + } + + internal byte[] AddBuffer(int size) + { + byte[] buffer = NewBuffer(size); + lock (this) + { + buffers.Add(buffer); + sizeInBytes += size; + } + + if (directory != null) + { + lock (directory) //{{DIGY}} what if directory gets null in the mean time? + { + directory.internalSizeInBytes += size; + } + } + + return buffer; + } + + public /*internal*/ byte[] GetBuffer(int index) + { + lock (this) + { + return buffers[index]; + } + } + + public /*internal*/ int NumBuffers() + { + lock (this) + { + return buffers.Count; + } + } + + /// Expert: allocate a new buffer. + /// Subclasses can allocate differently. + /// + /// size of allocated buffer. + /// + /// allocated buffer. + /// + public /*internal*/ virtual byte[] NewBuffer(int size) + { + return new byte[size]; + } + + + public virtual long SizeInBytes + { + get + { + lock (this) + { + return sizeInBytes; + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/RAMInputStream.cs b/external/Lucene.Net.Light/src/core/Store/RAMInputStream.cs new file mode 100644 index 0000000000..7e97f5fe62 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/RAMInputStream.cs @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// A memory-resident implementation. + /// + /// + public class RAMInputStream : IndexInput + { + internal static readonly int BUFFER_SIZE; + + private RAMFile file; + private long length; + + private byte[] currentBuffer; + private int currentBufferIndex; + + private int bufferPosition; + private long bufferStart; + private int bufferLength; + + public /*internal*/ RAMInputStream(RAMFile f) + { + file = f; + length = file.length; + if (length / BUFFER_SIZE >= System.Int32.MaxValue) + { + throw new System.IO.IOException("Too large RAMFile! " + length); + } + + // make sure that we switch to the + // first needed buffer lazily + currentBufferIndex = - 1; + currentBuffer = null; + } + + protected override void Dispose(bool disposing) + { + // do nothing + } + + public override long Length() + { + return length; + } + + public override byte ReadByte() + { + if (bufferPosition >= bufferLength) + { + currentBufferIndex++; + SwitchCurrentBuffer(true); + } + return currentBuffer[bufferPosition++]; + } + + public override void ReadBytes(byte[] b, int offset, int len) + { + while (len > 0) + { + if (bufferPosition >= bufferLength) + { + currentBufferIndex++; + SwitchCurrentBuffer(true); + } + + int remainInBuffer = bufferLength - bufferPosition; + int bytesToCopy = len < remainInBuffer?len:remainInBuffer; + Array.Copy(currentBuffer, bufferPosition, b, offset, bytesToCopy); + offset += bytesToCopy; + len -= bytesToCopy; + bufferPosition += bytesToCopy; + } + } + + private void SwitchCurrentBuffer(bool enforceEOF) + { + if (currentBufferIndex >= file.NumBuffers()) + { + // end of file reached, no more buffers left + if (enforceEOF) + throw new System.IO.IOException("Read past EOF"); + else + { + // Force EOF if a read takes place at this position + currentBufferIndex--; + bufferPosition = BUFFER_SIZE; + } + } + else + { + currentBuffer = file.GetBuffer(currentBufferIndex); + bufferPosition = 0; + bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; + long buflen = length - bufferStart; + bufferLength = buflen > BUFFER_SIZE?BUFFER_SIZE:(int) buflen; + } + } + + public override long FilePointer + { + get { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; } + } + + public override void Seek(long pos) + { + if (currentBuffer == null || pos < bufferStart || pos >= bufferStart + BUFFER_SIZE) + { + currentBufferIndex = (int) (pos / BUFFER_SIZE); + SwitchCurrentBuffer(false); + } + bufferPosition = (int) (pos % BUFFER_SIZE); + } + + static RAMInputStream() + { + BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/RAMOutputStream.cs b/external/Lucene.Net.Light/src/core/Store/RAMOutputStream.cs new file mode 100644 index 0000000000..64e91651a4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/RAMOutputStream.cs @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// A memory-resident implementation. + /// For lucene internal use. + /// + public class RAMOutputStream:IndexOutput + { + internal const int BUFFER_SIZE = 1024; + + private RAMFile file; + + private byte[] currentBuffer; + private int currentBufferIndex; + + private bool isDisposed; + + private int bufferPosition; + private long bufferStart; + private int bufferLength; + + /// Construct an empty output buffer. + public RAMOutputStream():this(new RAMFile()) + { + } + + internal RAMOutputStream(RAMFile f) + { + file = f; + + // make sure that we switch to the + // first needed buffer lazily + currentBufferIndex = - 1; + currentBuffer = null; + } + + /// Copy the current contents of this buffer to the named output. + public virtual void WriteTo(IndexOutput out_Renamed) + { + Flush(); + long end = file.length; + long pos = 0; + int buffer = 0; + while (pos < end) + { + int length = BUFFER_SIZE; + long nextPos = pos + length; + if (nextPos > end) + { + // at the last buffer + length = (int) (end - pos); + } + out_Renamed.WriteBytes(file.GetBuffer(buffer++), length); + pos = nextPos; + } + } + + /// Resets this to an empty buffer. + public virtual void Reset() + { + currentBuffer = null; + currentBufferIndex = -1; + bufferPosition = 0; + bufferStart = 0; + bufferLength = 0; + + file.Length = 0; + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + Flush(); + } + + isDisposed = true; + } + + public override void Seek(long pos) + { + // set the file length in case we seek back + // and flush() has not been called yet + SetFileLength(); + if (pos < bufferStart || pos >= bufferStart + bufferLength) + { + currentBufferIndex = (int) (pos / BUFFER_SIZE); + SwitchCurrentBuffer(); + } + + bufferPosition = (int) (pos % BUFFER_SIZE); + } + + public override long Length + { + get { return file.length; } + } + + public override void WriteByte(byte b) + { + if (bufferPosition == bufferLength) + { + currentBufferIndex++; + SwitchCurrentBuffer(); + } + currentBuffer[bufferPosition++] = b; + } + + public override void WriteBytes(byte[] b, int offset, int len) + { + System.Diagnostics.Debug.Assert(b != null); + while (len > 0) + { + if (bufferPosition == bufferLength) + { + currentBufferIndex++; + SwitchCurrentBuffer(); + } + + int remainInBuffer = currentBuffer.Length - bufferPosition; + int bytesToCopy = len < remainInBuffer?len:remainInBuffer; + Array.Copy(b, offset, currentBuffer, bufferPosition, bytesToCopy); + offset += bytesToCopy; + len -= bytesToCopy; + bufferPosition += bytesToCopy; + } + } + + private void SwitchCurrentBuffer() + { + if (currentBufferIndex == file.NumBuffers()) + { + currentBuffer = file.AddBuffer(BUFFER_SIZE); + } + else + { + currentBuffer = file.GetBuffer(currentBufferIndex); + } + bufferPosition = 0; + bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; + bufferLength = currentBuffer.Length; + } + + private void SetFileLength() + { + long pointer = bufferStart + bufferPosition; + if (pointer > file.length) + { + file.Length = pointer; + } + } + + public override void Flush() + { + file.LastModified = (DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond); + SetFileLength(); + } + + public override long FilePointer + { + get { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; } + } + + /// Returns byte usage of all buffers. + public virtual long SizeInBytes() + { + return file.NumBuffers() * BUFFER_SIZE; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/SimpleFSDirectory.cs b/external/Lucene.Net.Light/src/core/Store/SimpleFSDirectory.cs new file mode 100644 index 0000000000..3eab3592c9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/SimpleFSDirectory.cs @@ -0,0 +1,319 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// A straightforward implementation of + /// using java.io.RandomAccessFile. However, this class has + /// poor concurrent performance (multiple threads will + /// bottleneck) as it synchronizes when multiple threads + /// read from the same file. It's usually better to use + /// or instead. + /// + public class SimpleFSDirectory : FSDirectory + { + /// Create a new SimpleFSDirectory for the named location. + /// + /// + /// the path of the directory + /// + /// the lock factory to use, or null for the default. + /// + /// IOException + public SimpleFSDirectory(System.IO.DirectoryInfo path, LockFactory lockFactory) + : base(path, lockFactory) + { + } + + /// Create a new SimpleFSDirectory for the named location and the default lock factory. + /// + /// + /// the path of the directory + /// + /// IOException + public SimpleFSDirectory(System.IO.DirectoryInfo path) : base(path, null) + { + } + + /// Creates an IndexOutput for the file with the given name. + public override IndexOutput CreateOutput(System.String name) + { + InitOutput(name); + return new SimpleFSIndexOutput(new System.IO.FileInfo(System.IO.Path.Combine(internalDirectory.FullName, name))); + } + + /// Creates an IndexInput for the file with the given name. + public override IndexInput OpenInput(System.String name, int bufferSize) + { + EnsureOpen(); + + Exception e = null; + for (var i = 0; i < 10; i++) + { + try + { + return new SimpleFSIndexInput(new System.IO.FileInfo( + System.IO.Path.Combine(internalDirectory.FullName, name)), bufferSize, ReadChunkSize); + } + catch (System.UnauthorizedAccessException ex) + { + e = ex; + System.Threading.Thread.Sleep(1); + } + } + + throw e; + } + + protected internal class SimpleFSIndexInput : BufferedIndexInput + { + // TODO: This is a bad way to handle memory and disposing + protected internal class Descriptor : System.IO.BinaryReader + { + // remember if the file is open, so that we don't try to close it + // more than once + protected internal volatile bool isOpen; + internal long position; + internal long length; + + private bool isDisposed; + + public Descriptor(/*FSIndexInput enclosingInstance,*/ System.IO.FileInfo file, System.IO.FileAccess mode) + : base(new System.IO.FileStream(file.FullName, System.IO.FileMode.Open, mode, System.IO.FileShare.ReadWrite)) + { + isOpen = true; + length = file.Length; + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + if (isOpen) + { + isOpen = false; + } + } + + isDisposed = true; + base.Dispose(disposing); + } + + ~Descriptor() + { + try + { + Dispose(false); + } + finally + { + } + } + } + + protected internal Descriptor file; + internal bool isClone; + private bool isDisposed; + // LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM + protected internal int chunkSize; + + public SimpleFSIndexInput(System.IO.FileInfo path, int bufferSize, int chunkSize) + : base(bufferSize) + { + file = new Descriptor(path, System.IO.FileAccess.Read); + this.chunkSize = chunkSize; + } + + /// IndexInput methods + public override void ReadInternal(byte[] b, int offset, int len) + { + lock (file) + { + long position = FilePointer; + if (position != file.position) + { + file.BaseStream.Seek(position, System.IO.SeekOrigin.Begin); + file.position = position; + } + int total = 0; + + try + { + do + { + int readLength; + if (total + chunkSize > len) + { + readLength = len - total; + } + else + { + // LUCENE-1566 - work around JVM Bug by breaking very large reads into chunks + readLength = chunkSize; + } + int i = file.Read(b, offset + total, readLength); + if (i == - 1) + { + throw new System.IO.IOException("read past EOF"); + } + file.position += i; + total += i; + } + while (total < len); + } + catch (System.OutOfMemoryException e) + { + // propagate OOM up and add a hint for 32bit VM Users hitting the bug + // with a large chunk size in the fast path. + System.OutOfMemoryException outOfMemoryError = new System.OutOfMemoryException("OutOfMemoryError likely caused by the Sun VM Bug described in " + "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize " + "with a a value smaller than the current chunks size (" + chunkSize + ")", e); + throw outOfMemoryError; + } + } + } + + protected override void Dispose(bool disposing) + { + if (isDisposed) return; + if (disposing) + { + // only close the file if this is not a clone + if (!isClone && file != null) + { + file.Close(); + file = null; + } + } + + isDisposed = true; + } + + public override void SeekInternal(long position) + { + } + + public override long Length() + { + return file.length; + } + + public override System.Object Clone() + { + SimpleFSIndexInput clone = (SimpleFSIndexInput) base.Clone(); + clone.isClone = true; + return clone; + } + + /// Method used for testing. Returns true if the underlying + /// file descriptor is valid. + /// + public /*internal*/ virtual bool IsFDValid() + { + return file.BaseStream != null; + } + + public bool isClone_ForNUnit + { + get { return isClone; } + } + } + + /*protected internal*/ public class SimpleFSIndexOutput:BufferedIndexOutput + { + internal System.IO.FileStream file = null; + + // remember if the file is open, so that we don't try to close it + // more than once + private volatile bool isOpen; + + public SimpleFSIndexOutput(System.IO.FileInfo path) + { + file = new System.IO.FileStream(path.FullName, System.IO.FileMode.OpenOrCreate, System.IO.FileAccess.ReadWrite); + isOpen = true; + } + + /// output methods: + public override void FlushBuffer(byte[] b, int offset, int size) + { + file.Write(b, offset, size); + // {{dougsale-2.4.0}} + // FSIndexOutput.Flush + // When writing frequently with small amounts of data, the data isn't flushed to disk. + // Thus, attempting to read the data soon after this method is invoked leads to + // BufferedIndexInput.Refill() throwing an IOException for reading past EOF. + // Test\Index\TestDoc.cs demonstrates such a situation. + // Forcing a flush here prevents said issue. + // {{DIGY 2.9.0}} + // This code is not available in Lucene.Java 2.9.X. + // Can there be a indexing-performance problem? + file.Flush(); + } + + protected override void Dispose(bool disposing) + { + // only close the file if it has not been closed yet + if (isOpen) + { + bool success = false; + try + { + base.Dispose(disposing); + success = true; + } + finally + { + isOpen = false; + if (!success) + { + try + { + file.Dispose(); + } + catch (System.Exception) + { + // Suppress so we don't mask original exception + } + } + else + file.Dispose(); + } + } + } + + /// Random-access methods + public override void Seek(long pos) + { + base.Seek(pos); + file.Seek(pos, System.IO.SeekOrigin.Begin); + } + + public override long Length + { + get { return file.Length; } + } + + public override void SetLength(long length) + { + file.SetLength(length); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/SimpleFSLockFactory.cs b/external/Lucene.Net.Light/src/core/Store/SimpleFSLockFactory.cs new file mode 100644 index 0000000000..e7e284a789 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/SimpleFSLockFactory.cs @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + ///

Implements using + ///.

+ /// + ///

NOTE: the javadocs + /// for File.createNewFile contain a vague + /// yet spooky warning about not using the API for file + /// locking. This warning was added due to this + /// bug, and in fact the only known problem with using + /// this API for locking is that the Lucene write lock may + /// not be released when the JVM exits abnormally.

+ ///

When this happens, a + /// is hit when trying to create a writer, in which case you + /// need to explicitly clear the lock file first. You can + /// either manually remove the file, or use the + /// + /// API. But, first be certain that no writer is in fact + /// writing to the index otherwise you can easily corrupt + /// your index.

+ /// + ///

If you suspect that this or any other LockFactory is + /// not working properly in your environment, you can easily + /// test it by using , + /// and .

+ /// + ///

+ /// + /// + + public class SimpleFSLockFactory:FSLockFactory + { + + /// Create a SimpleFSLockFactory instance, with null (unset) + /// lock directory. When you pass this factory to a + /// subclass, the lock directory is automatically set to the + /// directory itsself. Be sure to create one instance for each directory + /// your create! + /// + public SimpleFSLockFactory():this((System.IO.DirectoryInfo) null) + { + } + + /// Instantiate using the provided directory (as a File instance). + /// where lock files should be created. + /// + public SimpleFSLockFactory(System.IO.DirectoryInfo lockDir) + { + LockDir = lockDir; + } + + /// Instantiate using the provided directory name (String). + /// where lock files should be created. + /// + public SimpleFSLockFactory(System.String lockDirName) + : this(new System.IO.DirectoryInfo(lockDirName)) + { + } + + public override Lock MakeLock(System.String lockName) + { + if (internalLockPrefix != null) + { + lockName = internalLockPrefix + "-" + lockName; + } + return new SimpleFSLock(internalLockDir, lockName); + } + + public override void ClearLock(System.String lockName) + { + bool tmpBool; + if (System.IO.File.Exists(internalLockDir.FullName)) + tmpBool = true; + else + tmpBool = System.IO.Directory.Exists(internalLockDir.FullName); + if (tmpBool) + { + if (internalLockPrefix != null) + { + lockName = internalLockPrefix + "-" + lockName; + } + System.IO.FileInfo lockFile = new System.IO.FileInfo(System.IO.Path.Combine(internalLockDir.FullName, lockName)); + bool tmpBool2; + if (System.IO.File.Exists(lockFile.FullName)) + tmpBool2 = true; + else + tmpBool2 = System.IO.Directory.Exists(lockFile.FullName); + bool tmpBool3; + if (System.IO.File.Exists(lockFile.FullName)) + { + System.IO.File.Delete(lockFile.FullName); + tmpBool3 = true; + } + else if (System.IO.Directory.Exists(lockFile.FullName)) + { + System.IO.Directory.Delete(lockFile.FullName); + tmpBool3 = true; + } + else + tmpBool3 = false; + if (tmpBool2 && !tmpBool3) + { + throw new System.IO.IOException("Cannot delete " + lockFile); + } + } + } + } + + + class SimpleFSLock:Lock + { + + internal System.IO.FileInfo lockFile; + internal System.IO.DirectoryInfo lockDir; + + [System.Obsolete("Use the constructor that takes a DirectoryInfo, this will be removed in the 3.0 release")] + public SimpleFSLock(System.IO.FileInfo lockDir, System.String lockFileName) : this(new System.IO.DirectoryInfo(lockDir.FullName), lockFileName) + { + } + + public SimpleFSLock(System.IO.DirectoryInfo lockDir, System.String lockFileName) + { + this.lockDir = new System.IO.DirectoryInfo(lockDir.FullName); + lockFile = new System.IO.FileInfo(System.IO.Path.Combine(lockDir.FullName, lockFileName)); + } + + public override bool Obtain() + { + + // Ensure that lockDir exists and is a directory: + bool tmpBool; + if (System.IO.File.Exists(lockDir.FullName)) + tmpBool = true; + else + tmpBool = System.IO.Directory.Exists(lockDir.FullName); + if (!tmpBool) + { + try + { + System.IO.Directory.CreateDirectory(lockDir.FullName); + } + catch + { + throw new System.IO.IOException("Cannot create directory: " + lockDir.FullName); + } + } + else + { + try + { + System.IO.Directory.Exists(lockDir.FullName); + } + catch + { + throw new System.IO.IOException("Found regular file where directory expected: " + lockDir.FullName); + } + } + + if (lockFile.Exists) + { + return false; + } + else + { + System.IO.FileStream createdFile = lockFile.Create(); + createdFile.Close(); + return true; + } + } + + public override void Release() + { + bool tmpBool; + if (System.IO.File.Exists(lockFile.FullName)) + tmpBool = true; + else + tmpBool = System.IO.Directory.Exists(lockFile.FullName); + bool tmpBool2; + if (System.IO.File.Exists(lockFile.FullName)) + { + System.IO.File.Delete(lockFile.FullName); + tmpBool2 = true; + } + else if (System.IO.Directory.Exists(lockFile.FullName)) + { + System.IO.Directory.Delete(lockFile.FullName); + tmpBool2 = true; + } + else + tmpBool2 = false; + if (tmpBool && !tmpBool2) + throw new LockReleaseFailedException("failed to delete " + lockFile); + } + + public override bool IsLocked() + { + bool tmpBool; + if (System.IO.File.Exists(lockFile.FullName)) + tmpBool = true; + else + tmpBool = System.IO.Directory.Exists(lockFile.FullName); + return tmpBool; + } + + public override System.String ToString() + { + return "SimpleFSLock@" + lockFile; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/SingleInstanceLockFactory.cs b/external/Lucene.Net.Light/src/core/Store/SingleInstanceLockFactory.cs new file mode 100644 index 0000000000..9b6828ba9e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/SingleInstanceLockFactory.cs @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// Implements for a single in-process instance, + /// meaning all locking will take place through this one instance. + /// Only use this when you are certain all + /// IndexReaders and IndexWriters for a given index are running + /// against a single shared in-process Directory instance. This is + /// currently the default locking for RAMDirectory. + /// + /// + /// + /// + + public class SingleInstanceLockFactory:LockFactory + { + + private System.Collections.Generic.HashSet locks = new System.Collections.Generic.HashSet(); + + public override Lock MakeLock(System.String lockName) + { + // We do not use the LockPrefix at all, because the private + // HashSet instance effectively scopes the locking to this + // single Directory instance. + return new SingleInstanceLock(locks, lockName); + } + + public override void ClearLock(System.String lockName) + { + lock (locks) + { + if (locks.Contains(lockName)) + { + locks.Remove(lockName); + } + } + } + } + + + class SingleInstanceLock:Lock + { + + internal System.String lockName; + private System.Collections.Generic.HashSet locks; + + public SingleInstanceLock(System.Collections.Generic.HashSet locks, System.String lockName) + { + this.locks = locks; + this.lockName = lockName; + } + + public override bool Obtain() + { + lock (locks) + { + if (locks.Contains(lockName) == false) + { + locks.Add(lockName); + return true; + } + + return false; + } + } + + public override void Release() + { + lock (locks) + { + locks.Remove(lockName); + } + } + + public override bool IsLocked() + { + lock (locks) + { + return locks.Contains(lockName); + } + } + + public override System.String ToString() + { + return base.ToString() + ": " + lockName; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Store/VerifyingLockFactory.cs b/external/Lucene.Net.Light/src/core/Store/VerifyingLockFactory.cs new file mode 100644 index 0000000000..24d52a1136 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Store/VerifyingLockFactory.cs @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Store +{ + + /// A that wraps another + /// and verifies that each lock obtain/release + /// is "correct" (never results in two processes holding the + /// lock at the same time). It does this by contacting an + /// external server () to assert that + /// at most one process holds the lock at a time. To use + /// this, you should also run on the + /// host & port matching what you pass to the constructor. + /// + /// + /// + /// + /// + /// + + public class VerifyingLockFactory:LockFactory + { + + internal LockFactory lf; + internal sbyte id; + internal System.String host; + internal int port; + + private class CheckedLock:Lock + { + private void InitBlock(VerifyingLockFactory enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private VerifyingLockFactory enclosingInstance; + public VerifyingLockFactory Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private Lock lock_Renamed; + + public CheckedLock(VerifyingLockFactory enclosingInstance, Lock lock_Renamed) + { + InitBlock(enclosingInstance); + this.lock_Renamed = lock_Renamed; + } + + private void Verify(sbyte message) + { + try + { + System.Net.Sockets.TcpClient s = new System.Net.Sockets.TcpClient(Enclosing_Instance.host, Enclosing_Instance.port); + System.IO.Stream out_Renamed = s.GetStream(); + out_Renamed.WriteByte((byte) Enclosing_Instance.id); + out_Renamed.WriteByte((byte) message); + System.IO.Stream in_Renamed = s.GetStream(); + int result = in_Renamed.ReadByte(); + in_Renamed.Close(); + out_Renamed.Close(); + s.Close(); + if (result != 0) + throw new System.SystemException("lock was double acquired"); + } + catch (System.Exception e) + { + throw new System.SystemException(e.Message, e); + } + } + + public override bool Obtain(long lockWaitTimeout) + { + lock (this) + { + bool obtained = lock_Renamed.Obtain(lockWaitTimeout); + if (obtained) + Verify((sbyte) 1); + return obtained; + } + } + + public override bool Obtain() + { + lock (this) + { + return lock_Renamed.Obtain(); + } + } + + public override bool IsLocked() + { + lock (this) + { + return lock_Renamed.IsLocked(); + } + } + + public override void Release() + { + lock (this) + { + if (IsLocked()) + { + Verify((sbyte) 0); + lock_Renamed.Release(); + } + } + } + } + + /// should be a unique id across all clients + /// + /// the LockFactory that we are testing + /// + /// host or IP where + /// is running + /// + /// the port is + /// listening on + /// + public VerifyingLockFactory(sbyte id, LockFactory lf, System.String host, int port) + { + this.id = id; + this.lf = lf; + this.host = host; + this.port = port; + } + + public override Lock MakeLock(System.String lockName) + { + lock (this) + { + return new CheckedLock(this, lf.MakeLock(lockName)); + } + } + + public override void ClearLock(System.String lockName) + { + lock (this) + { + lf.ClearLock(lockName); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/AppSettings.cs b/external/Lucene.Net.Light/src/core/Support/AppSettings.cs new file mode 100644 index 0000000000..a5e95cd986 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/AppSettings.cs @@ -0,0 +1,159 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Configuration; + +namespace Lucene.Net.Support +{ + /// + /// + /// + public class AppSettings + { + static System.Collections.Specialized.ListDictionary settings = new System.Collections.Specialized.ListDictionary(); + + /// + /// + /// + /// + /// + public static void Set(System.String key, int defValue) + { + settings[key] = defValue; + } + + /// + /// + /// + /// + /// + public static void Set(System.String key, long defValue) + { + settings[key] = defValue; + } + + /// + /// + /// + /// + /// + public static void Set(System.String key, System.String defValue) + { + settings[key] = defValue; + } + + /// + /// + /// + /// + /// + public static void Set(System.String key, bool defValue) + { + settings[key] = defValue; + } + + /// + /// + /// + /// + /// + /// + public static int Get(System.String key, int defValue) + { + if (settings[key] != null) + { + return (int)settings[key]; + } + + System.String theValue = ConfigurationManager.AppSettings.Get(key); + if (theValue == null) + { + return defValue; + } + int retValue = Convert.ToInt32(theValue.Trim()); + settings[key] = retValue; + return retValue; + } + + /// + /// + /// + /// + /// + /// + public static long Get(System.String key, long defValue) + { + if (settings[key] != null) + { + return (long)settings[key]; + } + + System.String theValue = ConfigurationManager.AppSettings.Get(key); + if (theValue == null) + { + return defValue; + } + long retValue = Convert.ToInt64(theValue.Trim()); + settings[key] = retValue; + return retValue; + } + + /// + /// + /// + /// + /// + /// + public static System.String Get(System.String key, System.String defValue) + { + if (settings[key] != null) + { + return (System.String)settings[key]; + } + + System.String theValue = ConfigurationManager.AppSettings.Get(key); + if (theValue == null) + { + return defValue; + } + settings[key] = theValue; + return theValue; + } + + public static bool Get(System.String key, bool defValue) + { + if (settings[key] != null) + { + return (bool)settings[key]; + } + + System.String theValue = ConfigurationManager.AppSettings.Get(key); + if (theValue == null) + { + return defValue; + } + bool retValue = Convert.ToBoolean(theValue.Trim()); + settings[key] = retValue; + return retValue; + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/AttributeImplItem.cs b/external/Lucene.Net.Light/src/core/Support/AttributeImplItem.cs new file mode 100644 index 0000000000..f0c4c5a417 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/AttributeImplItem.cs @@ -0,0 +1,41 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; + +namespace Lucene.Net.Support +{ + /// + /// A simple wrapper to allow for the use of the GeneralKeyedCollection. The + /// wrapper is required as there can be several keys for an object depending + /// on how many interfaces it implements. + /// + internal sealed class AttributeImplItem + { + internal AttributeImplItem(Type key, Util.Attribute value) + { + this.Key = key; + this.Value = value; + } + internal Type Key; + internal Util.Attribute Value; + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/BitSetSupport.cs b/external/Lucene.Net.Light/src/core/Support/BitSetSupport.cs new file mode 100644 index 0000000000..751a15e667 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/BitSetSupport.cs @@ -0,0 +1,88 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +namespace Lucene.Net.Support +{ + /// + /// This class provides supporting methods of java.util.BitSet + /// that are not present in System.Collections.BitArray. + /// + public class BitSetSupport + { + /// + /// Returns the next set bit at or after index, or -1 if no such bit exists. + /// + /// + /// the index of bit array at which to start checking + /// the next set bit or -1 + public static int NextSetBit(System.Collections.BitArray bitArray, int index) + { + while (index < bitArray.Length) + { + // if index bit is set, return it + // otherwise check next index bit + if (bitArray.Get(index)) + return index; + else + index++; + } + // if no bits are set at or after index, return -1 + return -1; + } + + /// + /// Returns the next un-set bit at or after index, or -1 if no such bit exists. + /// + /// + /// the index of bit array at which to start checking + /// the next set bit or -1 + public static int NextClearBit(System.Collections.BitArray bitArray, int index) + { + while (index < bitArray.Length) + { + // if index bit is not set, return it + // otherwise check next index bit + if (!bitArray.Get(index)) + return index; + else + index++; + } + // if no bits are set at or after index, return -1 + return -1; + } + + /// + /// Returns the number of bits set to true in this BitSet. + /// + /// The BitArray object. + /// The number of bits set to true in this BitSet. + public static int Cardinality(System.Collections.BitArray bits) + { + int count = 0; + for (int i = 0; i < bits.Count; i++) + { + if (bits[i]) + count++; + } + return count; + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/BuildType.cs b/external/Lucene.Net.Light/src/core/Support/BuildType.cs new file mode 100644 index 0000000000..1a84245498 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/BuildType.cs @@ -0,0 +1,32 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +namespace Lucene.Net.Support +{ + public class BuildType + { +#if DEBUG + public static bool Debug = true; +#else + public static bool Debug = false; +#endif + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/CRC32.cs b/external/Lucene.Net.Light/src/core/Support/CRC32.cs new file mode 100644 index 0000000000..d1efde286f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/CRC32.cs @@ -0,0 +1,83 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; + +namespace Lucene.Net.Support +{ + public class CRC32 : IChecksum + { + private static readonly UInt32[] crcTable = InitializeCRCTable(); + + private static UInt32[] InitializeCRCTable() + { + UInt32[] crcTable = new UInt32[256]; + for (UInt32 n = 0; n < 256; n++) + { + UInt32 c = n; + for (int k = 8; --k >= 0; ) + { + if ((c & 1) != 0) + c = 0xedb88320 ^ (c >> 1); + else + c = c >> 1; + } + crcTable[n] = c; + } + return crcTable; + } + + private UInt32 crc = 0; + + public long Value + { + get + { + return crc & 0xffffffffL; + } + } + + public void Reset() + { + crc = 0; + } + + public void Update(int bval) + { + UInt32 c = ~crc; + c = crcTable[(c ^ bval) & 0xff] ^ (c >> 8); + crc = ~c; + } + + public void Update(byte[] buf, int off, int len) + { + UInt32 c = ~crc; + while (--len >= 0) + c = crcTable[(c ^ buf[off++]) & 0xff] ^ (c >> 8); + crc = ~c; + } + + public void Update(byte[] buf) + { + Update(buf, 0, buf.Length); + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/Character.cs b/external/Lucene.Net.Light/src/core/Support/Character.cs new file mode 100644 index 0000000000..a306405511 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Character.cs @@ -0,0 +1,81 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +namespace Lucene.Net.Support +{ + /// + /// Mimics Java's Character class. + /// + public class Character + { + private const char charNull = '\0'; + private const char charZero = '0'; + private const char charA = 'a'; + + /// + /// + public static int MAX_RADIX + { + get + { + return 36; + } + } + + /// + /// + public static int MIN_RADIX + { + get + { + return 2; + } + } + + /// + /// + /// + /// + /// + /// + public static char ForDigit(int digit, int radix) + { + // if radix or digit is out of range, + // return the null character. + if (radix < Character.MIN_RADIX) + return charNull; + if (radix > Character.MAX_RADIX) + return charNull; + if (digit < 0) + return charNull; + if (digit >= radix) + return charNull; + + // if digit is less than 10, + // return '0' plus digit + if (digit < 10) + return (char)((int)charZero + digit); + + // otherwise, return 'a' plus digit. + return (char)((int)charA + digit - 10); + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/CloseableThreadLocalProfiler.cs b/external/Lucene.Net.Light/src/core/Support/CloseableThreadLocalProfiler.cs new file mode 100644 index 0000000000..b67a4b857d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/CloseableThreadLocalProfiler.cs @@ -0,0 +1,45 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; + +namespace Lucene.Net.Support +{ + /// + /// For Debuging purposes. + /// + public class CloseableThreadLocalProfiler + { + private static bool _enableCloseableThreadLocalProfiler = false; + public static System.Collections.Generic.List Instances = new System.Collections.Generic.List(); + + public static bool EnableCloseableThreadLocalProfiler + { + get { return _enableCloseableThreadLocalProfiler; } + set + { + _enableCloseableThreadLocalProfiler = value; + lock (Instances) + Instances.Clear(); + } + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/CollectionsHelper.cs b/external/Lucene.Net.Light/src/core/Support/CollectionsHelper.cs new file mode 100644 index 0000000000..74a07ab48d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/CollectionsHelper.cs @@ -0,0 +1,339 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Collections; + +namespace Lucene.Net.Support +{ + /// + /// Support class used to handle Hashtable addition, which does a check + /// first to make sure the added item is unique in the hash. + /// + public class CollectionsHelper + { + public static void Add(System.Collections.Hashtable hashtable, System.Object item) + { + hashtable.Add(item, item); + } + + public static void AddIfNotContains(System.Collections.Hashtable hashtable, System.Object item) + { + // Added lock around check. Even though the collection should already have + // a synchronized wrapper around it, it doesn't prevent this test from having + // race conditions. Two threads can (and have in TestIndexReaderReopen) call + // hashtable.Contains(item) == false at the same time, then both try to add to + // the hashtable, causing an ArgumentException. locking on the collection + // prevents this. -- cc + lock (hashtable) + { + if (hashtable.Contains(item) == false) + { + hashtable.Add(item, item); + } + } + } + + public static void AddIfNotContains(System.Collections.ArrayList hashtable, System.Object item) + { + // see AddIfNotContains(Hashtable, object) for information about the lock + lock (hashtable) + { + if (hashtable.Contains(item) == false) + { + hashtable.Add(item); + } + } + } + + public static void AddAll(System.Collections.Hashtable hashtable, System.Collections.ICollection items) + { + System.Collections.IEnumerator iter = items.GetEnumerator(); + System.Object item; + while (iter.MoveNext()) + { + item = iter.Current; + hashtable.Add(item, item); + } + } + + public static void AddAllIfNotContains(System.Collections.Hashtable hashtable, System.Collections.IList items) + { + System.Object item; + for (int i = 0; i < items.Count; i++) + { + item = items[i]; + if (hashtable.Contains(item) == false) + { + hashtable.Add(item, item); + } + } + } + + public static void AddAllIfNotContains(System.Collections.Hashtable hashtable, System.Collections.ICollection items) + { + System.Collections.IEnumerator iter = items.GetEnumerator(); + System.Object item; + while (iter.MoveNext()) + { + item = iter.Current; + if (hashtable.Contains(item) == false) + { + hashtable.Add(item, item); + } + } + } + + public static void AddAllIfNotContains(System.Collections.Generic.IDictionary hashtable, System.Collections.Generic.ICollection items) + { + foreach (string s in items) + { + if (hashtable.ContainsKey(s) == false) + { + hashtable.Add(s, s); + } + } + } + + public static void AddAll(System.Collections.Generic.IDictionary hashtable, System.Collections.Generic.ICollection items) + { + foreach (string s in items) + { + hashtable.Add(s, s); + } + } + + public static bool Contains(System.Collections.Generic.ICollection col, string item) + { + foreach (string s in col) if (s == item) return true; + return false; + } + + public static bool Contains(System.Collections.ICollection col, System.Object item) + { + System.Collections.IEnumerator iter = col.GetEnumerator(); + while (iter.MoveNext()) + { + if (iter.Current.Equals(item)) + return true; + } + return false; + } + + + public static System.String CollectionToString(System.Collections.Generic.IDictionary c) + { + Hashtable t = new Hashtable(); + foreach (string key in c.Keys) + { + t.Add(key, c[key]); + } + return CollectionToString(t); + } + + /// + /// Converts the specified collection to its string representation. + /// + /// The collection to convert to string. + /// A string representation of the specified collection. + public static System.String CollectionToString(System.Collections.ICollection c) + { + System.Text.StringBuilder s = new System.Text.StringBuilder(); + + if (c != null) + { + + System.Collections.ArrayList l = new System.Collections.ArrayList(c); + + bool isDictionary = (c is System.Collections.BitArray || c is System.Collections.Hashtable || c is System.Collections.IDictionary || c is System.Collections.Specialized.NameValueCollection || (l.Count > 0 && l[0] is System.Collections.DictionaryEntry)); + for (int index = 0; index < l.Count; index++) + { + if (l[index] == null) + s.Append("null"); + else if (!isDictionary) + s.Append(l[index]); + else + { + isDictionary = true; + if (c is System.Collections.Specialized.NameValueCollection) + s.Append(((System.Collections.Specialized.NameValueCollection)c).GetKey(index)); + else + s.Append(((System.Collections.DictionaryEntry)l[index]).Key); + s.Append("="); + if (c is System.Collections.Specialized.NameValueCollection) + s.Append(((System.Collections.Specialized.NameValueCollection)c).GetValues(index)[0]); + else + s.Append(((System.Collections.DictionaryEntry)l[index]).Value); + + } + if (index < l.Count - 1) + s.Append(", "); + } + + if (isDictionary) + { + if (c is System.Collections.ArrayList) + isDictionary = false; + } + if (isDictionary) + { + s.Insert(0, "{"); + s.Append("}"); + } + else + { + s.Insert(0, "["); + s.Append("]"); + } + } + else + s.Insert(0, "null"); + return s.ToString(); + } + + /// + /// Compares two string arrays for equality. + /// + /// First string array list to compare + /// Second string array list to compare + /// true if the strings are equal in both arrays, false otherwise + public static bool CompareStringArrays(System.String[] l1, System.String[] l2) + { + if (l1.Length != l2.Length) + return false; + for (int i = 0; i < l1.Length; i++) + { + if (l1[i] != l2[i]) + return false; + } + return true; + } + + /// + /// Sorts an IList collections + /// + /// The System.Collections.IList instance that will be sorted + /// The Comparator criteria, null to use natural comparator. + public static void Sort(System.Collections.IList list, System.Collections.IComparer Comparator) + { + if (((System.Collections.ArrayList)list).IsReadOnly) + throw new System.NotSupportedException(); + + if ((Comparator == null) || (Comparator is System.Collections.Comparer)) + { + try + { + ((System.Collections.ArrayList)list).Sort(); + } + catch (System.InvalidOperationException e) + { + throw new System.InvalidCastException(e.Message); + } + } + else + { + try + { + ((System.Collections.ArrayList)list).Sort(Comparator); + } + catch (System.InvalidOperationException e) + { + throw new System.InvalidCastException(e.Message); + } + } + } + + /// + /// Fills the array with an specific value from an specific index to an specific index. + /// + /// The array to be filled. + /// The first index to be filled. + /// The last index to be filled. + /// The value to fill the array with. + public static void Fill(System.Array array, System.Int32 fromindex, System.Int32 toindex, System.Object val) + { + System.Object Temp_Object = val; + System.Type elementtype = array.GetType().GetElementType(); + if (elementtype != val.GetType()) + Temp_Object = Convert.ChangeType(val, elementtype); + if (array.Length == 0) + throw (new System.NullReferenceException()); + if (fromindex > toindex) + throw (new System.ArgumentException()); + if ((fromindex < 0) || ((System.Array)array).Length < toindex) + throw (new System.IndexOutOfRangeException()); + for (int index = (fromindex > 0) ? fromindex-- : fromindex; index < toindex; index++) + array.SetValue(Temp_Object, index); + } + + + /// + /// Fills the array with an specific value. + /// + /// The array to be filled. + /// The value to fill the array with. + public static void Fill(System.Array array, System.Object val) + { + Fill(array, 0, array.Length, val); + } + + /// + /// Compares the entire members of one array whith the other one. + /// + /// The array to be compared. + /// The array to be compared with. + /// Returns true if the two specified arrays of Objects are equal + /// to one another. The two arrays are considered equal if both arrays + /// contain the same number of elements, and all corresponding pairs of + /// elements in the two arrays are equal. Two objects e1 and e2 are + /// considered equal if (e1==null ? e2==null : e1.equals(e2)). In other + /// words, the two arrays are equal if they contain the same elements in + /// the same order. Also, two array references are considered equal if + /// both are null. + public static bool Equals(System.Array array1, System.Array array2) + { + bool result = false; + if ((array1 == null) && (array2 == null)) + result = true; + else if ((array1 != null) && (array2 != null)) + { + if (array1.Length == array2.Length) + { + int length = array1.Length; + result = true; + for (int index = 0; index < length; index++) + { + System.Object o1 = array1.GetValue(index); + System.Object o2 = array2.GetValue(index); + if (o1 == null && o2 == null) + continue; // they match + else if (o1 == null || !o1.Equals(o2)) + { + result = false; + break; + } + } + } + } + return result; + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/Compare.cs b/external/Lucene.Net.Light/src/core/Support/Compare.cs new file mode 100644 index 0000000000..3e158462b8 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Compare.cs @@ -0,0 +1,49 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +namespace Lucene.Net.Support +{ + /// + /// Summary description for TestSupportClass. + /// + public class Compare + { + /// + /// Compares two Term arrays for equality. + /// + /// First Term array to compare + /// Second Term array to compare + /// true if the Terms are equal in both arrays, false otherwise + public static bool CompareTermArrays(Index.Term[] t1, Index.Term[] t2) + { + if (t1.Length != t2.Length) + return false; + for (int i = 0; i < t1.Length; i++) + { + if (t1[i].CompareTo(t2[i]) == 0) + { + return true; + } + } + return false; + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/Compatibility/ConcurrentDictionary.cs b/external/Lucene.Net.Light/src/core/Support/Compatibility/ConcurrentDictionary.cs new file mode 100644 index 0000000000..47914da8a7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Compatibility/ConcurrentDictionary.cs @@ -0,0 +1,312 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using System.Threading; + +#if NET35 + +namespace Lucene.Net.Support.Compatibility +{ + /// + /// Support class that emulates the behavior of the ConcurrentDictionary + /// from .NET 4.0. This class will, in most cases, perform slightly slower + /// than the 4.0 equivalent. Note that all behavior is emulated, which means + /// that , , and + /// all return a snapshot of the data at the time it was called. + /// + [Serializable] + public class ConcurrentDictionary : IDictionary + { + private readonly object _lockObj = new object(); + private readonly Dictionary _dictInst; + + public ConcurrentDictionary() + : this(16) + { } + + public ConcurrentDictionary(int capacity) + : this(capacity, EqualityComparer.Default) + { } + + public ConcurrentDictionary(int capacity, IEqualityComparer comparer) + { + _dictInst = new Dictionary(capacity, comparer); + } + + public ConcurrentDictionary(IEnumerable> keyValuePairs) + : this(16) + { + foreach(var value in keyValuePairs) + { + _dictInst.Add(value.Key, value.Value); + } + } + + #region Concurrent Dictionary Special Methods + + public TValue AddOrUpdate(TKey key, Func addValueFactory, Func updateValueFactory) + { + lock(_lockObj) + { + if(_dictInst.ContainsKey(key)) + { + _dictInst[key] = updateValueFactory(key, _dictInst[key]); + } + else + { + _dictInst[key] = addValueFactory(key); + } + + return _dictInst[key]; + } + } + + public TValue AddOrUpdate(TKey key, TValue addValue, Func updateValueFactory) + { + lock (_lockObj) + { + if (_dictInst.ContainsKey(key)) + { + _dictInst[key] = updateValueFactory(key, _dictInst[key]); + } + else + { + _dictInst[key] = addValue; + } + + return _dictInst[key]; + } + } + + public TValue GetOrAdd(TKey key, Func valueFactory) + { + lock (_lockObj) + { + if (!_dictInst.ContainsKey(key)) + { + _dictInst[key] = valueFactory(key); + } + + return _dictInst[key]; + } + } + + public TValue GetOrAdd(TKey key, TValue value) + { + lock (_lockObj) + { + if (!_dictInst.ContainsKey(key)) + { + _dictInst[key] = value; + } + + return _dictInst[key]; + } + } + + public bool TryAdd(TKey key, TValue value) + { + lock (_lockObj) + { + if (_dictInst.ContainsKey(key)) + { + return false; + } + + _dictInst[key] = value; + return true; + } + } + + public bool TryRemove(TKey key, out TValue value) + { + lock (_lockObj) + { + if (_dictInst.ContainsKey(key)) + { + value = _dictInst[key]; + _dictInst.Remove(key); + return true; + } + + value = default(TValue); + return false; + } + } + + public bool TryUpdate(TKey key, TValue newValue, TValue comparisonValue) + { + lock (_lockObj) + { + if (_dictInst.ContainsKey(key) && _dictInst[key].Equals(comparisonValue)) + { + _dictInst[key] = newValue; + return true; + } + + return false; + } + } + + #endregion + + #region IDictionary Methods + + // .NET4 ConcurrentDictionary returns an enumerator that can enumerate even + // if the collection is modified. We can't do that, so create a copy (expensive) + public IEnumerator> GetEnumerator() + { + lock (_lockObj) + { + return _dictInst.ToList().GetEnumerator(); + } + } + + public bool TryGetValue(TKey key, out TValue value) + { + lock (_lockObj) + { + return _dictInst.TryGetValue(key, out value); + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + public void Clear() + { + lock (_lockObj) + { + _dictInst.Clear(); + } + } + + public int Count + { + get + { + lock (_lockObj) + { + return _dictInst.Count; + } + } + } + + public bool ContainsKey(TKey key) + { + lock (_lockObj) + { + return _dictInst.ContainsKey(key); + } + } + + public TValue this[TKey key] + { + get + { + lock (_lockObj) + { + return _dictInst[key]; + } + } + set + { + lock (_lockObj) + { + _dictInst[key] = value; + } + } + } + + public ICollection Keys + { + get { return _dictInst.Keys.ToArray(); } + } + + public ICollection Values + { + get { return _dictInst.Values.ToArray(); } + } + + #endregion + + #region Explicit Interface Definitions + + bool ICollection>.IsReadOnly + { + get { return ((ICollection>) _dictInst).IsReadOnly; } + } + + void IDictionary.Add(TKey key, TValue value) + { + lock (_lockObj) + { + _dictInst.Add(key, value); + } + } + + bool ICollection>.Contains(KeyValuePair item) + { + lock (_lockObj) + { + return _dictInst.Contains(item); + } + } + + bool IDictionary.Remove(TKey key) + { + lock (_lockObj) + { + return _dictInst.Remove(key); + } + } + + void ICollection>.Add(KeyValuePair item) + { + lock (_lockObj) + { + ((ICollection>)_dictInst).Add(item); + } + } + + void ICollection>.CopyTo(KeyValuePair[] array, int arrayIndex) + { + lock (_lockObj) + { + ((ICollection>)_dictInst).CopyTo(array, arrayIndex); + } + } + + bool ICollection>.Remove(KeyValuePair item) + { + lock (_lockObj) + { + return ((ICollection>)_dictInst).Remove(item); + } + } + + #endregion + } +} + +#endif \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/Compatibility/Func.cs b/external/Lucene.Net.Light/src/core/Support/Compatibility/Func.cs new file mode 100644 index 0000000000..508c6a7b27 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Compatibility/Func.cs @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +namespace System +{ +#if NET35 + public delegate TResult Func(T1 arg1, T2 arg2, T3 arg3, T4 arg4, + T5 arg5, T6 arg6, T7 arg7, T8 arg8, + T9 arg9); + + public delegate TResult Func(T1 arg1, T2 arg2, T3 arg3, T4 arg4, + T5 arg5, T6 arg6, T7 arg7, T8 arg8, + T9 arg9, T10 arg10); +#endif +} diff --git a/external/Lucene.Net.Light/src/core/Support/Compatibility/ISet.cs b/external/Lucene.Net.Light/src/core/Support/Compatibility/ISet.cs new file mode 100644 index 0000000000..fc9fa79327 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Compatibility/ISet.cs @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if NET35 + +namespace System.Collections.Generic +{ + public interface ISet : ICollection + { +#region METHODS + + new bool Add(T item); + + void ExceptWith(IEnumerable other); + + void IntersectWith(IEnumerable other); + + bool IsProperSubsetOf(IEnumerable other); + + bool IsProperSupersetOf(IEnumerable other); + + bool IsSubsetOf(IEnumerable other); + + bool IsSupersetOf(IEnumerable other); + + bool Overlaps(IEnumerable other); + + bool SetEquals(IEnumerable other); + + void SymmetricExceptWith(IEnumerable other); + + void UnionWith(IEnumerable other); + + #endregion + +#region EXTENSION METHODS + + + + #endregion + } + +} + +#endif \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/Compatibility/SetFactory.cs b/external/Lucene.Net.Light/src/core/Support/Compatibility/SetFactory.cs new file mode 100644 index 0000000000..18b25206a6 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Compatibility/SetFactory.cs @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System.Collections.Generic; + +namespace Lucene.Net.Support.Compatibility +{ + public static class SetFactory + { + public static ISet CreateHashSet() + { +#if NET35 + return new WrappedHashSet(); +#else + return new HashSet(); +#endif + } + + public static ISet CreateHashSet(IEnumerable other) + { +#if NET35 + return new WrappedHashSet(other); +#else + return new HashSet(other); +#endif + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/Compatibility/SortedSet.cs b/external/Lucene.Net.Light/src/core/Support/Compatibility/SortedSet.cs new file mode 100644 index 0000000000..0dad5e95fb --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Compatibility/SortedSet.cs @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if NET35 + +namespace System.Collections.Generic +{ + [Serializable] + public class SortedSet : ISet, ICollection + { + private readonly SortedList _list; + + public SortedSet() + : this(Comparer.Default) + { } + + public SortedSet(IComparer comparer) + { + _list = new SortedList(comparer); + } + + public T Min { get { return (_list.Count) >= 1 ? _list.Keys[0] : default(T); } } + + public T Max { get { return (_list.Count) >= 1 ? _list.Keys[_list.Count - 1] : default(T); } } + + + /// + /// Removes all items from the . + /// + /// The is read-only. + /// + public void Clear() + { + _list.Clear(); + } + + public void CopyTo(T[] array, int arrayIndex) + { + _list.Keys.CopyTo(array, arrayIndex); + } + + public bool Remove(T item) + { + return _list.Remove(item); + } + + public bool Contains(T value) + { + return _list.ContainsKey(value); + } + + public bool Add(T item) + { + if (!_list.ContainsKey(item)) + { + _list.Add(item, 0); + return true; + } + return false; + } + + public void UnionWith(IEnumerable other) + { + foreach (var obj in other) + Add(obj); + } + + public IEnumerator GetEnumerator() + { + return _list.Keys.GetEnumerator(); + } + + public IComparer Comparer { get { return _list.Comparer; } } + + public int Count + { + get { return _list.Count; } + } + + #region Explicit Interface Implementations + + void ICollection.Add(T item) + { + Add(item); + } + + void ICollection.CopyTo(Array array, int index) + { + CopyTo((T[]) array, index); + } + + bool ICollection.IsReadOnly + { + get { return false; } + } + + bool ICollection.IsSynchronized + { + get { return false; } + } + + object ICollection.SyncRoot + { + get { throw new NotSupportedException(); } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + int ICollection.Count + { + get { return Count; } + } + + #endregion + + #region ISet Implementation + + void ISet.ExceptWith(IEnumerable other) + { + foreach(var obj in other) + { + _list.Remove(obj); + } + } + + void ISet.IntersectWith(IEnumerable other) + { + throw new NotImplementedException(); + } + + bool ISet.IsProperSubsetOf(IEnumerable other) + { + throw new NotImplementedException(); + } + + bool ISet.IsProperSupersetOf(IEnumerable other) + { + throw new NotImplementedException(); + } + + bool ISet.IsSubsetOf(IEnumerable other) + { + throw new NotImplementedException(); + } + + bool ISet.IsSupersetOf(IEnumerable other) + { + throw new NotImplementedException(); + } + + bool ISet.Overlaps(IEnumerable other) + { + throw new NotImplementedException(); + } + + bool ISet.SetEquals(IEnumerable other) + { + throw new NotImplementedException(); + } + + void ISet.SymmetricExceptWith(IEnumerable other) + { + throw new NotImplementedException(); + } + + #endregion + } +} + +#endif \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/Compatibility/ThreadLocal.cs b/external/Lucene.Net.Light/src/core/Support/Compatibility/ThreadLocal.cs new file mode 100644 index 0000000000..167228b854 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Compatibility/ThreadLocal.cs @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if NET35 + +using System; + +namespace Lucene.Net.Support.Compatibility +{ + public class ThreadLocal : IDisposable + { + [ThreadStatic] + static WeakDictionary, T> slots; + + static void Init() + { + if (slots == null) slots = new WeakDictionary, T>(); + } + + public T Value + { + set + { + Init(); + slots.Add(this, value); + } + get + { + Init(); + return (T)slots[this]; + } + } + + public void Dispose() + { + if (slots != null) slots.Remove(this); + } + } +} + +#endif \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/Compatibility/WrappedHashSet.cs b/external/Lucene.Net.Light/src/core/Support/Compatibility/WrappedHashSet.cs new file mode 100644 index 0000000000..c2ffd21b5f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Compatibility/WrappedHashSet.cs @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if NET35 + +using System; +using System.Collections.Generic; +using System.Runtime.Serialization; + +namespace Lucene.Net.Support.Compatibility +{ + [Serializable] + class WrappedHashSet : HashSet, ISet + { + public WrappedHashSet() + { } + + public WrappedHashSet(IEnumerable items) + : base(items) + { } + + protected WrappedHashSet(SerializationInfo info, StreamingContext context) + : base (info, context) + { + + } + } +} + +#endif \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/Cryptography.cs b/external/Lucene.Net.Light/src/core/Support/Cryptography.cs new file mode 100644 index 0000000000..3b6e593feb --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Cryptography.cs @@ -0,0 +1,45 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System.Security.Cryptography; + +namespace Lucene.Net.Support +{ + public static class Cryptography + { + public static bool FIPSCompliant = false; + + public static HashAlgorithm HashAlgorithm + { + get + { + if (FIPSCompliant) + { + //LUCENENET-175 + //No Assumptions should be made on the HashAlgorithm. It may change in time. + //SHA256 SHA384 SHA512 etc. + return SHA1.Create(); + } + return MD5.Create(); + } + } +} +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/Deflater.cs b/external/Lucene.Net.Light/src/core/Support/Deflater.cs new file mode 100644 index 0000000000..03473de313 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Deflater.cs @@ -0,0 +1,97 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; + +namespace Lucene.Net.Support +{ + public class Deflater + { + delegate void SetLevelDelegate(int level); + delegate void SetInputDelegate(byte[] input, int offset, int count); + delegate void FinishDelegate(); + delegate bool GetIsFinishedDelegate(); + delegate int DeflateDelegate(byte[] output); + + SetLevelDelegate setLevelMethod; + SetInputDelegate setInputMethod; + FinishDelegate finishMethod; + GetIsFinishedDelegate getIsFinishedMethod; + DeflateDelegate deflateMethod; + + public const int BEST_COMPRESSION = 9; + + internal Deflater(object deflaterInstance) + { + Type type = deflaterInstance.GetType(); + + setLevelMethod = (SetLevelDelegate)Delegate.CreateDelegate( + typeof(SetLevelDelegate), + deflaterInstance, + type.GetMethod("SetLevel", new Type[] { typeof(int) })); + + setInputMethod = (SetInputDelegate)Delegate.CreateDelegate( + typeof(SetInputDelegate), + deflaterInstance, + type.GetMethod("SetInput", new Type[] { typeof(byte[]), typeof(int), typeof(int) })); + + finishMethod = (FinishDelegate)Delegate.CreateDelegate( + typeof(FinishDelegate), + deflaterInstance, + type.GetMethod("Finish", Type.EmptyTypes)); + + getIsFinishedMethod = (GetIsFinishedDelegate)Delegate.CreateDelegate( + typeof(GetIsFinishedDelegate), + deflaterInstance, + type.GetMethod("get_IsFinished", Type.EmptyTypes)); + + deflateMethod = (DeflateDelegate)Delegate.CreateDelegate( + typeof(DeflateDelegate), + deflaterInstance, + type.GetMethod("Deflate", new Type[] { typeof(byte[]) })); + } + + public void SetLevel(int level) + { + setLevelMethod(level); + } + + public void SetInput(byte[] input, int offset, int count) + { + setInputMethod(input, offset, count); + } + + public void Finish() + { + finishMethod(); + } + + public bool IsFinished + { + get { return getIsFinishedMethod(); } + } + + public int Deflate(byte[] output) + { + return deflateMethod(output); + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/Double.cs b/external/Lucene.Net.Light/src/core/Support/Double.cs new file mode 100644 index 0000000000..a8cff94b31 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Double.cs @@ -0,0 +1,44 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Globalization; + +namespace Lucene.Net.Support +{ + /// + /// + /// + public class Double + { + public static System.Double Parse(System.String s) + { + try + { + return System.Double.Parse(s.Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator)); + } + catch (OverflowException) + { + return System.Double.MaxValue; + } + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/EquatableList.cs b/external/Lucene.Net.Light/src/core/Support/EquatableList.cs new file mode 100644 index 0000000000..ab6fb47f87 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/EquatableList.cs @@ -0,0 +1,339 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Collections; +using System.Collections.Generic; + +namespace Lucene.Net.Support +{ + /// Represents a strongly typed list of objects that can be accessed by index. + /// Provides methods to search, sort, and manipulate lists. Also provides functionality + /// to compare lists against each other through an implementations of + /// . + /// The type of elements in the list. + [Serializable] + public class EquatableList : System.Collections.Generic.List, + IEquatable>, + ICloneable + { + /// Initializes a new instance of the + /// class that is empty and has the + /// default initial capacity. + public EquatableList() : base() { } + + /// Initializes a new instance of the + /// class that contains elements copied from the specified collection and has + /// sufficient capacity to accommodate the number of elements copied. + /// The collection whose elements are copied to the new list. + public EquatableList(System.Collections.Generic.IEnumerable collection) : base(collection) { } + + /// Initializes a new instance of the + /// class that is empty and has the specified initial capacity. + /// The number of elements that the new list can initially store. + public EquatableList(int capacity) : base(capacity) { } + + /// Adds a range of objects represented by the + /// implementation. + /// The + /// implementation to add to this list. + public void AddRange(ICollection c) + { + // If the collection is null, throw an exception. + if (c == null) throw new ArgumentNullException("c"); + + // Pre-compute capacity. + Capacity = Math.Max(c.Count + Count, Capacity); + + // Cycle through the items and add. + foreach (T item in c) + { + // Add the item. + Add(item); + } + } + + /// Compares the counts of two + /// implementations. + /// This uses a trick in LINQ, sniffing types for implementations + /// of interfaces that might supply shortcuts when trying to make comparisons. + /// In this case, that is the and + /// interfaces, either of which can provide a count + /// which can be used in determining the equality of sequences (if they don't have + /// the same count, then they can't be equal). + /// The from the left hand side of the + /// comparison to check the count of. + /// The from the right hand side of the + /// comparison to check the count of. + /// Null if the result is indeterminate. This occurs when either + /// or doesn't implement or . + /// Otherwise, it will get the count from each and return true if they are equal, false otherwise. + private static bool? EnumerableCountsEqual(System.Collections.Generic.IEnumerable x, System.Collections.Generic.IEnumerable y) + { + // Get the ICollection and ICollection interfaces. + System.Collections.Generic.ICollection xOfTCollection = x as System.Collections.Generic.ICollection; + System.Collections.Generic.ICollection yOfTCollection = y as System.Collections.Generic.ICollection; + ICollection xCollection = x as ICollection; + ICollection yCollection = y as ICollection; + + // The count in x and y. + int? xCount = xOfTCollection != null ? xOfTCollection.Count : xCollection != null ? xCollection.Count : (int?)null; + int? yCount = yOfTCollection != null ? yOfTCollection.Count : yCollection != null ? yCollection.Count : (int?)null; + + // If either are null, return null, the result is indeterminate. + if (xCount == null || yCount == null) + { + // Return null, indeterminate. + return null; + } + + // Both counts are non-null, compare. + return xCount == yCount; + } + + /// Compares the contents of a + /// implementation to another one to determine equality. + /// Thinking of the implementation as + /// a string with any number of characters, the algorithm checks + /// each item in each list. If any item of the list is not equal (or + /// one list contains all the elements of another list), then that list + /// element is compared to the other list element to see which + /// list is greater. + /// The implementation + /// that is considered the left hand side. + /// The implementation + /// that is considered the right hand side. + /// True if the items are equal, false otherwise. + private static bool Equals(System.Collections.Generic.IEnumerable x, + System.Collections.Generic.IEnumerable y) + { + // If x and y are null, then return true, they are the same. + if (x == null && y == null) + { + // They are the same, return 0. + return true; + } + + // If one is null, then return a value based on whether or not + // one is null or not. + if (x == null || y == null) + { + // Return false, one is null, the other is not. + return false; + } + + // Check to see if the counts on the IEnumerable implementations are equal. + // This is a shortcut, if they are not equal, then the lists are not equal. + // If the result is indeterminate, then get out. + bool? enumerableCountsEqual = EnumerableCountsEqual(x, y); + + // If the enumerable counts have been able to be calculated (indicated by + // a non-null value) and it is false, then no need to iterate through the items. + if (enumerableCountsEqual != null && !enumerableCountsEqual.Value) + { + // The sequences are not equal. + return false; + } + + // The counts of the items in the enumerations are equal, or indeterminate + // so a full iteration needs to be made to compare each item. + // Get the default comparer for T first. + System.Collections.Generic.EqualityComparer defaultComparer = + EqualityComparer.Default; + + // Get the enumerator for y. + System.Collections.Generic.IEnumerator otherEnumerator = y.GetEnumerator(); + + // Call Dispose on IDisposable if there is an implementation on the + // IEnumerator returned by a call to y.GetEnumerator(). + using (otherEnumerator as IDisposable) + { + // Cycle through the items in this list. + foreach (T item in x) + { + // If there isn't an item to get, then this has more + // items than that, they are not equal. + if (!otherEnumerator.MoveNext()) + { + // Return false. + return false; + } + + // Perform a comparison. Must check this on the left hand side + // and that on the right hand side. + bool comparison = defaultComparer.Equals(item, otherEnumerator.Current); + + // If the value is false, return false. + if (!comparison) + { + // Return the value. + return comparison; + } + } + + // If there are no more items, then return true, the sequences + // are equal. + if (!otherEnumerator.MoveNext()) + { + // The sequences are equal. + return true; + } + + // The other sequence has more items than this one, return + // false, these are not equal. + return false; + } + } + + #region IEquatable> Members + /// Compares this sequence to another + /// implementation, returning true if they are equal, false otherwise. + /// The other implementation + /// to compare against. + /// True if the sequence in + /// is the same as this one. + public bool Equals(System.Collections.Generic.IEnumerable other) + { + // Compare to the other sequence. If 0, then equal. + return Equals(this, other); + } + #endregion + + /// Compares this object for equality against other. + /// The other object to compare this object against. + /// True if this object and are equal, false + /// otherwise. + public override bool Equals(object obj) + { + // Call the strongly typed version. + return Equals(obj as System.Collections.Generic.IEnumerable); + } + + /// Gets the hash code for the list. + /// The hash code value. + public override int GetHashCode() + { + // Call the static method, passing this. + return GetHashCode(this); + } + +#if __MonoCS__ + public static int GetHashCode(System.Collections.Generic.IEnumerable source) +#else + /// Gets the hash code for the list. + /// The + /// implementation which will have all the contents hashed. + /// The hash code value. + public static int GetHashCode(System.Collections.Generic.IEnumerable source) +#endif + { + // If source is null, then return 0. + if (source == null) return 0; + + // Seed the hash code with the hash code of the type. + // This is done so that you don't have a lot of collisions of empty + // ComparableList instances when placed in dictionaries + // and things that rely on hashcodes. + int hashCode = typeof(T).GetHashCode(); + + // Iterate through the items in this implementation. + foreach (T item in source) + { + // Adjust the hash code. + hashCode = 31 * hashCode + (item == null ? 0 : item.GetHashCode()); + } + + // Return the hash code. + return hashCode; + } + + // TODO: When diverging from Java version of Lucene, can uncomment these to adhere to best practices when overriding the Equals method and implementing IEquatable. + ///// Overload of the == operator, it compares a + ///// to an + ///// implementation. + ///// The to compare + ///// against . + ///// The to compare + ///// against . + ///// True if the instances are equal, false otherwise. + //public static bool operator ==(EquatableList x, System.Collections.Generic.IEnumerable y) + //{ + // // Call Equals. + // return Equals(x, y); + //} + + ///// Overload of the == operator, it compares a + ///// to an + ///// implementation. + ///// The to compare + ///// against . + ///// The to compare + ///// against . + ///// True if the instances are equal, false otherwise. + //public static bool operator ==(System.Collections.Generic.IEnumerable x, EquatableList y) + //{ + // // Call equals. + // return Equals(x, y); + //} + + ///// Overload of the != operator, it compares a + ///// to an + ///// implementation. + ///// The to compare + ///// against . + ///// The to compare + ///// against . + ///// True if the instances are not equal, false otherwise. + //public static bool operator !=(EquatableList x, System.Collections.Generic.IEnumerable y) + //{ + // // Return the negative of the equals operation. + // return !(x == y); + //} + + ///// Overload of the != operator, it compares a + ///// to an + ///// implementation. + ///// The to compare + ///// against . + ///// The to compare + ///// against . + ///// True if the instances are not equal, false otherwise. + //public static bool operator !=(System.Collections.Generic.IEnumerable x, EquatableList y) + //{ + // // Return the negative of the equals operation. + // return !(x == y); + //} + + #region ICloneable Members + + /// Clones the . + /// This is a shallow clone. + /// A new shallow clone of this + /// . + public object Clone() + { + // Just create a new one, passing this to the constructor. + return new EquatableList(this); + } + + #endregion + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/FileSupport.cs b/external/Lucene.Net.Light/src/core/Support/FileSupport.cs new file mode 100644 index 0000000000..b6236c80ab --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/FileSupport.cs @@ -0,0 +1,121 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.IO; + +namespace Lucene.Net.Support +{ + /// + /// Represents the methods to support some operations over files. + /// + public class FileSupport + { + /// + /// Returns an array of abstract pathnames representing the files and directories of the specified path. + /// + /// The abstract pathname to list it childs. + /// An array of abstract pathnames childs of the path specified or null if the path is not a directory + public static System.IO.FileInfo[] GetFiles(System.IO.FileInfo path) + { + if ((path.Attributes & FileAttributes.Directory) > 0) + { + String[] fullpathnames = Directory.GetFileSystemEntries(path.FullName); + System.IO.FileInfo[] result = new System.IO.FileInfo[fullpathnames.Length]; + for (int i = 0; i < result.Length; i++) + result[i] = new System.IO.FileInfo(fullpathnames[i]); + return result; + } + else + return null; + } + + // TODO: This filesupport thing is silly. Same goes with _TestUtil's RMDir. + // If we're removing a directory + public static System.IO.FileInfo[] GetFiles(System.IO.DirectoryInfo path) + { + return GetFiles(new FileInfo(path.FullName)); + } + + /// + /// Returns a list of files in a give directory. + /// + /// The full path name to the directory. + /// + /// An array containing the files. + public static System.String[] GetLuceneIndexFiles(System.String fullName, + Index.IndexFileNameFilter indexFileNameFilter) + { + System.IO.DirectoryInfo dInfo = new System.IO.DirectoryInfo(fullName); + System.Collections.ArrayList list = new System.Collections.ArrayList(); + foreach (System.IO.FileInfo fInfo in dInfo.GetFiles()) + { + if (indexFileNameFilter.Accept(fInfo, fInfo.Name) == true) + { + list.Add(fInfo.Name); + } + } + System.String[] retFiles = new System.String[list.Count]; + list.CopyTo(retFiles); + return retFiles; + } + + // Disable the obsolete warning since we must use FileStream.Handle + // because Mono does not support FileSystem.SafeFileHandle at present. +#pragma warning disable 618 + + /// + /// Flushes the specified file stream. Ensures that all buffered + /// data is actually written to the file system. + /// + /// The file stream. + public static void Sync(System.IO.FileStream fileStream) + { + if (fileStream == null) + throw new ArgumentNullException("fileStream"); + + fileStream.Flush(); + + //if (OS.IsWindows) + //{ + // if (!FlushFileBuffers(fileStream.Handle)) + // throw new System.IO.IOException(); + //} + //else if (OS.IsUnix) + //{ + // if (fsync(fileStream.Handle) != IntPtr.Zero) + // throw new System.IO.IOException(); + //} + //else + //{ + // throw new NotImplementedException(); + //} + } + +#pragma warning restore 618 + + //[System.Runtime.InteropServices.DllImport("libc")] + //extern static IntPtr fsync(IntPtr fd); + + //[System.Runtime.InteropServices.DllImport("kernel32.dll")] + //extern static bool FlushFileBuffers(IntPtr hFile); + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/GeneralKeyedCollection.cs b/external/Lucene.Net.Light/src/core/Support/GeneralKeyedCollection.cs new file mode 100644 index 0000000000..1b2e29b37d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/GeneralKeyedCollection.cs @@ -0,0 +1,96 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Diagnostics; + +namespace Lucene.Net.Support +{ + /// A collection of which can be + /// looked up by instances of . + /// The type of the items contains in this + /// collection. + /// The type of the keys that can be used to look + /// up the items. + internal class GeneralKeyedCollection : System.Collections.ObjectModel.KeyedCollection + { + /// Creates a new instance of the + /// class. + /// The which will convert + /// instances of to + /// when the override of is called. + internal GeneralKeyedCollection(Converter converter) + : base() + { + // If the converter is null, throw an exception. + if (converter == null) throw new ArgumentNullException("converter"); + + // Store the converter. + this.converter = converter; + + // That's all folks. + return; + } + + /// The which will convert + /// instances of to + /// when the override of is called. + private readonly Converter converter; + + /// Converts an item that is added to the collection to + /// a key. + /// The instance of + /// to convert into an instance of . + /// The instance of which is the + /// key for this item. + protected override TKey GetKeyForItem(TItem item) + { + // The converter is not null. + Debug.Assert(converter != null); + + // Call the converter. + return converter(item); + } + + /// Determines if a key for an item exists in this + /// collection. + /// The instance of + /// to see if it exists in this collection. + /// True if the key exists in the collection, false otherwise. + public bool ContainsKey(TKey key) + { + // Call the dictionary - it is lazily created when the first item is added + if (Dictionary != null) + { + return Dictionary.ContainsKey(key); + } + else + { + return false; + } + } + + public System.Collections.Generic.IList Values() + { + return base.Items; + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/HashMap.cs b/external/Lucene.Net.Light/src/core/Support/HashMap.cs new file mode 100644 index 0000000000..04e09c2d66 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/HashMap.cs @@ -0,0 +1,449 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Collections; +using System.Collections.Generic; + +namespace Lucene.Net.Support +{ + /// + /// A C# emulation of the Java Hashmap + /// + /// A is a close equivalent to the Java + /// Hashmap. One difference java implementation of the class is that + /// the Hashmap supports both null keys and values, where the C# Dictionary + /// only supports null values not keys. Also, V Get(TKey) + /// method in Java returns null if the key doesn't exist, instead of throwing + /// an exception. This implementation doesn't throw an exception when a key + /// doesn't exist, it will return null. This class is slower than using a + /// , because of extra checks that have to be + /// done on each access, to check for null. + /// + /// + /// NOTE: This class works best with nullable types. default(T) is returned + /// when a key doesn't exist in the collection (this being similar to how Java returns + /// null). Therefore, if the expected behavior of the java code is to execute code + /// based on if the key exists, when the key is an integer type, it will return 0 instead of null. + /// + /// + /// Consider also implementing IDictionary, IEnumerable, and ICollection + /// like does, so HashMap can be + /// used in substituted in place for the same interfaces it implements. + /// + /// + /// The type of keys in the dictionary + /// The type of values in the dictionary + [Serializable] + public class HashMap : IDictionary + { + internal IEqualityComparer _comparer; + internal Dictionary _dict; + + // Indicates if a null key has been assigned, used for iteration + private bool _hasNullValue; + // stores the value for the null key + private TValue _nullValue; + // Indicates the type of key is a non-nullable valuetype + private bool _isValueType; + + public HashMap() + : this(0) + { } + + public HashMap(IEqualityComparer comparer) + : this(0, comparer) + { + + } + + public HashMap(int initialCapacity) + : this(initialCapacity, EqualityComparer.Default) + { + + } + + public HashMap(int initialCapacity, IEqualityComparer comparer) + { + _comparer = comparer; + _dict = new Dictionary(initialCapacity, _comparer); + _hasNullValue = false; + + if (typeof(TKey).IsValueType) + { + _isValueType = Nullable.GetUnderlyingType(typeof(TKey)) == null; + } + } + + public HashMap(IEnumerable> other) + : this(0) + { + foreach (var kvp in other) + { + Add(kvp.Key, kvp.Value); + } + } + + public bool ContainsValue(TValue value) + { + if (!_isValueType && _hasNullValue && _nullValue.Equals(value)) + return true; + + return _dict.ContainsValue(value); + } + + #region Implementation of IEnumerable + + public IEnumerator> GetEnumerator() + { + if (!_isValueType && _hasNullValue) + { + yield return new KeyValuePair(default(TKey), _nullValue); + } + foreach (var kvp in _dict) + { + yield return kvp; + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + #endregion + + #region Implementation of ICollection> + + void ICollection>.Add(KeyValuePair item) + { + Add(item.Key, item.Value); + } + + public void Clear() + { + _hasNullValue = false; + _nullValue = default(TValue); + _dict.Clear(); + } + + bool ICollection>.Contains(KeyValuePair item) + { + if (!_isValueType && _comparer.Equals(item.Key, default(TKey))) + { + return _hasNullValue && EqualityComparer.Default.Equals(item.Value, _nullValue); + } + + return ((ICollection>)_dict).Contains(item); + } + + void ICollection>.CopyTo(KeyValuePair[] array, int arrayIndex) + { + ((ICollection>) _dict).CopyTo(array, arrayIndex); + if(!_isValueType && _hasNullValue) + { + array[array.Length - 1] = new KeyValuePair(default(TKey), _nullValue); + } + } + + public bool Remove(KeyValuePair item) + { + if (!_isValueType && _comparer.Equals(item.Key, default(TKey))) + { + if (!_hasNullValue) + return false; + + _hasNullValue = false; + _nullValue = default(TValue); + return true; + } + + return ((ICollection>)_dict).Remove(item); + } + + public int Count + { + get { return _dict.Count + (_hasNullValue ? 1 : 0); } + } + + public bool IsReadOnly + { + get { return false; } + } + + #endregion + + #region Implementation of IDictionary + + public bool ContainsKey(TKey key) + { + if (!_isValueType && _comparer.Equals(key, default(TKey))) + { + if (_hasNullValue) + { + return true; + } + return false; + } + + return _dict.ContainsKey(key); + } + + public virtual void Add(TKey key, TValue value) + { + if (!_isValueType && _comparer.Equals(key, default(TKey))) + { + _hasNullValue = true; + _nullValue = value; + } + else + { + _dict[key] = value; + } + } + + public bool Remove(TKey key) + { + if (!_isValueType && _comparer.Equals(key, default(TKey))) + { + _hasNullValue = false; + _nullValue = default(TValue); + return true; + } + else + { + return _dict.Remove(key); + } + } + + public bool TryGetValue(TKey key, out TValue value) + { + if (!_isValueType && _comparer.Equals(key, default(TKey))) + { + if (_hasNullValue) + { + value = _nullValue; + return true; + } + + value = default(TValue); + return false; + } + else + { + return _dict.TryGetValue(key, out value); + } + } + + public TValue this[TKey key] + { + get + { + if (!_isValueType && _comparer.Equals(key, default(TKey))) + { + if (!_hasNullValue) + { + return default(TValue); + } + return _nullValue; + } + return _dict.ContainsKey(key) ? _dict[key] : default(TValue); + } + set { Add(key, value); } + } + + public ICollection Keys + { + get + { + if (!_hasNullValue) return _dict.Keys; + + // Using a List to generate an ICollection + // would incur a costly copy of the dict's KeyCollection + // use out own wrapper instead + return new NullKeyCollection(_dict); + } + } + + public ICollection Values + { + get + { + if (!_hasNullValue) return _dict.Values; + + // Using a List to generate an ICollection + // would incur a costly copy of the dict's ValueCollection + // use out own wrapper instead + return new NullValueCollection(_dict, _nullValue); + } + } + + #endregion + + #region NullValueCollection + + /// + /// Wraps a dictionary and adds the value + /// represented by the null key + /// + class NullValueCollection : ICollection + { + private readonly TValue _nullValue; + private readonly Dictionary _internalDict; + + public NullValueCollection(Dictionary dict, TValue nullValue) + { + _internalDict = dict; + _nullValue = nullValue; + } + + #region Implementation of IEnumerable + + public IEnumerator GetEnumerator() + { + yield return _nullValue; + + foreach (var val in _internalDict.Values) + { + yield return val; + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + #endregion + + #region Implementation of ICollection + + public void CopyTo(TValue[] array, int arrayIndex) + { + throw new NotImplementedException("Implement as needed"); + } + + public int Count + { + get { return _internalDict.Count + 1; } + } + + public bool IsReadOnly + { + get { return true; } + } + + #region Explicit Interface Methods + + void ICollection.Add(TValue item) + { + throw new NotSupportedException(); + } + + void ICollection.Clear() + { + throw new NotSupportedException(); + } + + bool ICollection.Contains(TValue item) + { + throw new NotSupportedException(); + } + + bool ICollection.Remove(TValue item) + { + throw new NotSupportedException("Collection is read only!"); + } + #endregion + + #endregion + } + + #endregion + + #region NullKeyCollection + /// + /// Wraps a dictionary's collection, adding in a + /// null key. + /// + class NullKeyCollection : ICollection + { + private readonly Dictionary _internalDict; + + public NullKeyCollection(Dictionary dict) + { + _internalDict = dict; + } + + public IEnumerator GetEnumerator() + { + yield return default(TKey); + foreach (var key in _internalDict.Keys) + { + yield return key; + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + public void CopyTo(TKey[] array, int arrayIndex) + { + throw new NotImplementedException("Implement this as needed"); + } + + public int Count + { + get { return _internalDict.Count + 1; } + } + + public bool IsReadOnly + { + get { return true; } + } + + #region Explicit Interface Definitions + bool ICollection.Contains(TKey item) + { + throw new NotSupportedException(); + } + + void ICollection.Add(TKey item) + { + throw new NotSupportedException(); + } + + void ICollection.Clear() + { + throw new NotSupportedException(); + } + + bool ICollection.Remove(TKey item) + { + throw new NotSupportedException(); + } + #endregion + } + #endregion + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/IChecksum.cs b/external/Lucene.Net.Light/src/core/Support/IChecksum.cs new file mode 100644 index 0000000000..1b2d393f7d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/IChecksum.cs @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +using System; + +namespace Lucene.Net.Support +{ + /// + /// Contains conversion support elements such as classes, interfaces and static methods. + /// + public interface IChecksum + { + void Reset(); + void Update(int b); + void Update(byte[] b); + void Update(byte[] b, int offset, int length); + long Value { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Support/IThreadRunnable.cs b/external/Lucene.Net.Light/src/core/Support/IThreadRunnable.cs new file mode 100644 index 0000000000..309979ab7e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/IThreadRunnable.cs @@ -0,0 +1,36 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +namespace Lucene.Net.Support +{ + /// + /// This interface should be implemented by any class whose instances are intended + /// to be executed by a thread. + /// + public interface IThreadRunnable + { + /// + /// This method has to be implemented in order that starting of the thread causes the object's + /// run method to be called in that separately executing thread. + /// + void Run(); + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/Inflater.cs b/external/Lucene.Net.Light/src/core/Support/Inflater.cs new file mode 100644 index 0000000000..a67add0b61 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Inflater.cs @@ -0,0 +1,71 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; + +namespace Lucene.Net.Support +{ + public class Inflater + { + delegate void SetInputDelegate(byte[] buffer); + delegate bool GetIsFinishedDelegate(); + delegate int InflateDelegate(byte[] buffer); + + SetInputDelegate setInputMethod; + GetIsFinishedDelegate getIsFinishedMethod; + InflateDelegate inflateMethod; + + internal Inflater(object inflaterInstance) + { + Type type = inflaterInstance.GetType(); + + setInputMethod = (SetInputDelegate)Delegate.CreateDelegate( + typeof(SetInputDelegate), + inflaterInstance, + type.GetMethod("SetInput", new Type[] { typeof(byte[]) })); + + getIsFinishedMethod = (GetIsFinishedDelegate)Delegate.CreateDelegate( + typeof(GetIsFinishedDelegate), + inflaterInstance, + type.GetMethod("get_IsFinished", Type.EmptyTypes)); + + inflateMethod = (InflateDelegate)Delegate.CreateDelegate( + typeof(InflateDelegate), + inflaterInstance, + type.GetMethod("Inflate", new Type[] { typeof(byte[]) })); + } + + public void SetInput(byte[] buffer) + { + setInputMethod(buffer); + } + + public bool IsFinished + { + get { return getIsFinishedMethod(); } + } + + public int Inflate(byte[] buffer) + { + return inflateMethod(buffer); + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/Number.cs b/external/Lucene.Net.Light/src/core/Support/Number.cs new file mode 100644 index 0000000000..70f35a96e9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Number.cs @@ -0,0 +1,252 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Globalization; + +namespace Lucene.Net.Support +{ + /// + /// A simple class for number conversions. + /// + public class Number + { + /// + /// Min radix value. + /// + public const int MIN_RADIX = 2; + /// + /// Max radix value. + /// + public const int MAX_RADIX = 36; + + private const System.String digits = "0123456789abcdefghijklmnopqrstuvwxyz"; + + + /// + /// Converts a number to System.String. + /// + /// + /// + public static System.String ToString(long number) + { + System.Text.StringBuilder s = new System.Text.StringBuilder(); + + if (number == 0) + { + s.Append("0"); + } + else + { + if (number < 0) + { + s.Append("-"); + number = -number; + } + + while (number > 0) + { + char c = digits[(int)number % 36]; + s.Insert(0, c); + number = number / 36; + } + } + + return s.ToString(); + } + + + /// + /// Converts a number to System.String. + /// + /// + /// + public static System.String ToString(float f) + { + if (((float)(int)f) == f) + { + return ((int)f).ToString() + ".0"; + } + else + { + return f.ToString(NumberFormatInfo.InvariantInfo); + } + } + + /// + /// Converts a number to System.String in the specified radix. + /// + /// A number to be converted. + /// A radix. + /// A System.String representation of the number in the specified redix. + public static System.String ToString(long i, int radix) + { + if (radix < MIN_RADIX || radix > MAX_RADIX) + radix = 10; + + char[] buf = new char[65]; + int charPos = 64; + bool negative = (i < 0); + + if (!negative) + { + i = -i; + } + + while (i <= -radix) + { + buf[charPos--] = digits[(int)(-(i % radix))]; + i = i / radix; + } + buf[charPos] = digits[(int)(-i)]; + + if (negative) + { + buf[--charPos] = '-'; + } + + return new System.String(buf, charPos, (65 - charPos)); + } + + /// + /// Parses a number in the specified radix. + /// + /// An input System.String. + /// A radix. + /// The parsed number in the specified radix. + public static long Parse(System.String s, int radix) + { + if (s == null) + { + throw new ArgumentException("null"); + } + + if (radix < MIN_RADIX) + { + throw new NotSupportedException("radix " + radix + + " less than Number.MIN_RADIX"); + } + if (radix > MAX_RADIX) + { + throw new NotSupportedException("radix " + radix + + " greater than Number.MAX_RADIX"); + } + + long result = 0; + long mult = 1; + + s = s.ToLower(); + + for (int i = s.Length - 1; i >= 0; i--) + { + int weight = digits.IndexOf(s[i]); + if (weight == -1) + throw new FormatException("Invalid number for the specified radix"); + + result += (weight * mult); + mult *= radix; + } + + return result; + } + + /// + /// Performs an unsigned bitwise right shift with the specified number + /// + /// Number to operate on + /// Ammount of bits to shift + /// The resulting number from the shift operation + public static int URShift(int number, int bits) + { + return (int)(((uint)number) >> bits); + } + + + /// + /// Performs an unsigned bitwise right shift with the specified number + /// + /// Number to operate on + /// Ammount of bits to shift + /// The resulting number from the shift operation + public static long URShift(long number, int bits) + { + return (long)(((ulong)number) >> bits); + } + + + /// + /// Returns the index of the first bit that is set to true that occurs + /// on or after the specified starting index. If no such bit exists + /// then -1 is returned. + /// + /// The BitArray object. + /// The index to start checking from (inclusive). + /// The index of the next set bit. + public static int NextSetBit(System.Collections.BitArray bits, int fromIndex) + { + for (int i = fromIndex; i < bits.Length; i++) + { + if (bits[i] == true) + { + return i; + } + } + return -1; + } + + /// + /// Converts a System.String number to long. + /// + /// + /// + public static long ToInt64(System.String s) + { + long number = 0; + long factor; + + // handle negative number + if (s.StartsWith("-")) + { + s = s.Substring(1); + factor = -1; + } + else + { + factor = 1; + } + + // generate number + for (int i = s.Length - 1; i > -1; i--) + { + int n = digits.IndexOf(s[i]); + + // not supporting fractional or scientific notations + if (n < 0) + throw new System.ArgumentException("Invalid or unsupported character in number: " + s[i]); + + number += (n * factor); + factor *= 36; + } + + return number; + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/OS.cs b/external/Lucene.Net.Light/src/core/Support/OS.cs new file mode 100644 index 0000000000..7f80abf5db --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/OS.cs @@ -0,0 +1,62 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; + +namespace Lucene.Net.Support +{ + /// + /// Provides platform infos. + /// + public class OS + { + static bool isUnix; + static bool isWindows; + + static OS() + { + PlatformID pid = Environment.OSVersion.Platform; + isWindows = pid == PlatformID.Win32NT || pid == PlatformID.Win32Windows; + + // we use integers instead of enum tags because "MacOS" + // requires 2.0 SP2, 3.0 SP2 or 3.5 SP1. + // 128 is mono's old platform tag for Unix. + int id = (int)pid; + isUnix = id == 4 || id == 6 || id == 128; + } + + /// + /// Whether we run under a Unix platform. + /// + public static bool IsUnix + { + get { return isUnix; } + } + + /// + /// Whether we run under a supported Windows platform. + /// + public static bool IsWindows + { + get { return isWindows; } + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/SharpZipLib.cs b/external/Lucene.Net.Light/src/core/Support/SharpZipLib.cs new file mode 100644 index 0000000000..c7882770bd --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/SharpZipLib.cs @@ -0,0 +1,51 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System.Reflection; + +namespace Lucene.Net.Support +{ + public class SharpZipLib + { + static System.Reflection.Assembly asm = null; + + static SharpZipLib() + { + try + { + asm = Assembly.Load("ICSharpCode.SharpZipLib"); + } + catch { } + } + + public static Deflater CreateDeflater() + { + if (asm == null) throw new System.IO.FileNotFoundException("Can not load ICSharpCode.SharpZipLib.dll"); + return new Deflater(asm.CreateInstance("ICSharpCode.SharpZipLib.Zip.Compression.Deflater")); + } + + public static Inflater CreateInflater() + { + if (asm == null) throw new System.IO.FileNotFoundException("Can not load ICSharpCode.SharpZipLib.dll"); + return new Inflater(asm.CreateInstance("ICSharpCode.SharpZipLib.Zip.Compression.Inflater")); + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/Single.cs b/external/Lucene.Net.Light/src/core/Support/Single.cs new file mode 100644 index 0000000000..12fa50014a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/Single.cs @@ -0,0 +1,131 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Globalization; + +namespace Lucene.Net.Support +{ + /// + /// + /// + public class Single + { + /// + /// + /// + /// + /// + /// + /// + public static System.Single Parse(System.String s, System.Globalization.NumberStyles style, System.IFormatProvider provider) + { + if (s.EndsWith("f") || s.EndsWith("F")) + return System.Single.Parse(s.Substring(0, s.Length - 1), style, provider); + else + return System.Single.Parse(s, style, provider); + } + + /// + /// + /// + /// + /// + /// + public static System.Single Parse(System.String s, System.IFormatProvider provider) + { + if (s.EndsWith("f") || s.EndsWith("F")) + return System.Single.Parse(s.Substring(0, s.Length - 1), provider); + else + return System.Single.Parse(s, provider); + } + + /// + /// + /// + /// + /// + /// + public static System.Single Parse(System.String s, System.Globalization.NumberStyles style) + { + if (s.EndsWith("f") || s.EndsWith("F")) + return System.Single.Parse(s.Substring(0, s.Length - 1), style); + else + return System.Single.Parse(s, style); + } + + /// + /// + /// + /// + /// + public static System.Single Parse(System.String s) + { + if (s.EndsWith("f") || s.EndsWith("F")) + return System.Single.Parse(s.Substring(0, s.Length - 1).Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator)); + else + return System.Single.Parse(s.Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator)); + } + + public static bool TryParse(System.String s, out float f) + { + bool ok = false; + + if (s.EndsWith("f") || s.EndsWith("F")) + ok = System.Single.TryParse(s.Substring(0, s.Length - 1).Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator), out f); + else + ok = System.Single.TryParse(s.Replace(".", CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator), out f); + + return ok; + } + + /// + /// + /// + /// + /// + public static string ToString(float f) + { + return f.ToString().Replace(CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator, "."); + } + + /// + /// + /// + /// + /// + /// + public static string ToString(float f, string format) + { + return f.ToString(format).Replace(CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator, "."); + } + + public static int FloatToIntBits(float value) + { + return BitConverter.ToInt32(BitConverter.GetBytes(value), 0); + } + + public static float IntBitsToFloat(int value) + { + return BitConverter.ToSingle(BitConverter.GetBytes(value), 0); + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/TextSupport.cs b/external/Lucene.Net.Light/src/core/Support/TextSupport.cs new file mode 100644 index 0000000000..de4da46dda --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/TextSupport.cs @@ -0,0 +1,49 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +namespace Lucene.Net.Support +{ + public class TextSupport + { + /// + /// Copies an array of chars obtained from a String into a specified array of chars + /// + /// The String to get the chars from + /// Position of the String to start getting the chars + /// Position of the String to end getting the chars + /// Array to return the chars + /// Position of the destination array of chars to start storing the chars + /// An array of chars + public static void GetCharsFromString(string sourceString, int sourceStart, int sourceEnd, char[] destinationArray, int destinationStart) + { + int sourceCounter; + int destinationCounter; + sourceCounter = sourceStart; + destinationCounter = destinationStart; + while (sourceCounter < sourceEnd) + { + destinationArray[destinationCounter] = (char)sourceString[sourceCounter]; + sourceCounter++; + destinationCounter++; + } + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/ThreadClass.cs b/external/Lucene.Net.Light/src/core/Support/ThreadClass.cs new file mode 100644 index 0000000000..6657424eac --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/ThreadClass.cs @@ -0,0 +1,315 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Threading; + +namespace Lucene.Net.Support +{ + /// + /// Support class used to handle threads + /// + public class ThreadClass : IThreadRunnable + { + /// + /// The instance of System.Threading.Thread + /// + private System.Threading.Thread threadField; + + + /// + /// Initializes a new instance of the ThreadClass class + /// + public ThreadClass() + { + threadField = new System.Threading.Thread(new System.Threading.ThreadStart(Run)); + } + + /// + /// Initializes a new instance of the Thread class. + /// + /// The name of the thread + public ThreadClass(System.String Name) + { + threadField = new System.Threading.Thread(new System.Threading.ThreadStart(Run)); + this.Name = Name; + } + + /// + /// Initializes a new instance of the Thread class. + /// + /// A ThreadStart delegate that references the methods to be invoked when this thread begins executing + public ThreadClass(System.Threading.ThreadStart Start) + { + threadField = new System.Threading.Thread(Start); + } + + /// + /// Initializes a new instance of the Thread class. + /// + /// A ThreadStart delegate that references the methods to be invoked when this thread begins executing + /// The name of the thread + public ThreadClass(System.Threading.ThreadStart Start, System.String Name) + { + threadField = new System.Threading.Thread(Start); + this.Name = Name; + } + + /// + /// This method has no functionality unless the method is overridden + /// + public virtual void Run() + { + } + + /// + /// Causes the operating system to change the state of the current thread instance to ThreadState.Running + /// + public virtual void Start() + { + threadField.Start(); + } + + /// + /// Interrupts a thread that is in the WaitSleepJoin thread state + /// + public virtual void Interrupt() + { + threadField.Interrupt(); + } + + /// + /// Gets the current thread instance + /// + public System.Threading.Thread Instance + { + get + { + return threadField; + } + set + { + threadField = value; + } + } + + /// + /// Gets or sets the name of the thread + /// + public System.String Name + { + get + { + return threadField.Name; + } + set + { + if (threadField.Name == null) + threadField.Name = value; + } + } + + public void SetDaemon(bool isDaemon) + { + threadField.IsBackground = isDaemon; + } + + /// + /// Gets or sets a value indicating the scheduling priority of a thread + /// + public System.Threading.ThreadPriority Priority + { + get + { + try + { + return threadField.Priority; + } + catch + { + return ThreadPriority.Normal; + } + } + set + { + try + { + threadField.Priority = value; + } + catch { } + + } + } + + /// + /// Gets a value indicating the execution status of the current thread + /// + public bool IsAlive + { + get + { + return threadField.IsAlive; + } + } + + /// + /// Gets or sets a value indicating whether or not a thread is a background thread. + /// + public bool IsBackground + { + get + { + return threadField.IsBackground; + } + set + { + threadField.IsBackground = value; + } + } + + /// + /// Blocks the calling thread until a thread terminates + /// + public void Join() + { + threadField.Join(); + } + + /// + /// Blocks the calling thread until a thread terminates or the specified time elapses + /// + /// Time of wait in milliseconds + public void Join(long MiliSeconds) + { + threadField.Join(new System.TimeSpan(MiliSeconds * 10000)); + } + + /// + /// Blocks the calling thread until a thread terminates or the specified time elapses + /// + /// Time of wait in milliseconds + /// Time of wait in nanoseconds + public void Join(long MiliSeconds, int NanoSeconds) + { + threadField.Join(new System.TimeSpan(MiliSeconds * 10000 + NanoSeconds * 100)); + } + + /// + /// Resumes a thread that has been suspended + /// + public void Resume() + { + Monitor.PulseAll(threadField); + } + + /// + /// Raises a ThreadAbortException in the thread on which it is invoked, + /// to begin the process of terminating the thread. Calling this method + /// usually terminates the thread + /// + public void Abort() + { + threadField.Abort(); + } + + /// + /// Raises a ThreadAbortException in the thread on which it is invoked, + /// to begin the process of terminating the thread while also providing + /// exception information about the thread termination. + /// Calling this method usually terminates the thread. + /// + /// An object that contains application-specific information, such as state, which can be used by the thread being aborted + public void Abort(object stateInfo) + { + threadField.Abort(stateInfo); + } + + /// + /// Suspends the thread, if the thread is already suspended it has no effect + /// + public void Suspend() + { + Monitor.Wait(threadField); + } + + /// + /// Obtain a String that represents the current object + /// + /// A String that represents the current object + public override System.String ToString() + { + return "Thread[" + Name + "," + Priority.ToString() + "]"; + } + + [ThreadStatic] + static ThreadClass This = null; + + // named as the Java version + public static ThreadClass CurrentThread() + { + return Current(); + } + + public static void Sleep(long ms) + { + // casting long ms to int ms could lose resolution, however unlikely + // that someone would want to sleep for that long... + Thread.Sleep((int)ms); + } + + /// + /// Gets the currently running thread + /// + /// The currently running thread + public static ThreadClass Current() + { + if (This == null) + { + This = new ThreadClass(); + This.Instance = Thread.CurrentThread; + } + return This; + } + + public static bool operator ==(ThreadClass t1, object t2) + { + if (((object)t1) == null) return t2 == null; + return t1.Equals(t2); + } + + public static bool operator !=(ThreadClass t1, object t2) + { + return !(t1 == t2); + } + + public override bool Equals(object obj) + { + if (obj == null) return false; + if (obj is ThreadClass) return this.threadField.Equals(((ThreadClass)obj).threadField); + return false; + } + + public override int GetHashCode() + { + return this.threadField.GetHashCode(); + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/ThreadLock.cs b/external/Lucene.Net.Light/src/core/Support/ThreadLock.cs new file mode 100644 index 0000000000..93cd71900f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/ThreadLock.cs @@ -0,0 +1,82 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System.Threading; + +namespace Lucene.Net.Support +{ + /// + /// Abstract base class that provides a synchronization interface + /// for derived lock types + /// + public abstract class ThreadLock + { + public abstract void Enter(object obj); + public abstract void Exit(object obj); + + private static readonly ThreadLock _nullLock = new NullThreadLock(); + private static readonly ThreadLock _monitorLock = new MonitorThreadLock(); + + /// + /// A ThreadLock class that actually does no locking + /// Used in ParallelMultiSearcher/MultiSearcher + /// + public static ThreadLock NullLock + { + get { return _nullLock; } + } + + /// + /// Wrapper class for the Monitor Enter/Exit methods + /// using the interface + /// + public static ThreadLock MonitorLock + { + get { return _monitorLock; } + } + + private sealed class NullThreadLock : ThreadLock + { + public override void Enter(object obj) + { + // Do nothing + } + + public override void Exit(object obj) + { + // Do nothing + } + } + + private sealed class MonitorThreadLock : ThreadLock + { + public override void Enter(object obj) + { + Monitor.Enter(obj); + } + + public override void Exit(object obj) + { + Monitor.Exit(obj); + } + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Support/WeakDictionary.cs b/external/Lucene.Net.Light/src/core/Support/WeakDictionary.cs new file mode 100644 index 0000000000..880b02a31c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Support/WeakDictionary.cs @@ -0,0 +1,296 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; + +namespace Lucene.Net.Support +{ + public sealed class WeakDictionary : IDictionary + { + private HashMap, TValue> _hm; + private int _gcCollections = 0; + + public WeakDictionary(int initialCapacity) : this(initialCapacity, Enumerable.Empty>()) + { } + + public WeakDictionary() : this(32, Enumerable.Empty>()) + { } + + public WeakDictionary(IEnumerable> otherDictionary) : this(32, otherDictionary) + { } + + private WeakDictionary(int initialCapacity, IEnumerable> otherDict) + { + _hm = new HashMap, TValue>(initialCapacity); + foreach (var kvp in otherDict) + { + _hm.Add(new WeakKey(kvp.Key), kvp.Value); + } + } + + private void Clean() + { + if (_hm.Count == 0) return; + var newHm = new HashMap, TValue>(); + foreach (var entry in _hm.Where(x => x.Key != null && x.Key.IsAlive)) + { + newHm.Add(entry.Key, entry.Value); + } + _hm = newHm; + } + + private void CleanIfNeeded() + { + int currentColCount = GC.CollectionCount(0); + if (currentColCount > _gcCollections) + { + Clean(); + _gcCollections = currentColCount; + } + } + + public IEnumerator> GetEnumerator() + { + foreach (var kvp in _hm.Where(x => x.Key.IsAlive)) + { + yield return new KeyValuePair(kvp.Key.Target, kvp.Value); + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + void ICollection>.Add(KeyValuePair item) + { + CleanIfNeeded(); + ((ICollection, TValue>>) _hm).Add( + new KeyValuePair, TValue>(new WeakKey(item.Key), item.Value)); + } + + public void Clear() + { + _hm.Clear(); + } + + bool ICollection>.Contains(KeyValuePair item) + { + return ((ICollection, TValue>>)_hm).Contains( + new KeyValuePair, TValue>(new WeakKey(item.Key), item.Value)); + } + + bool ICollection>.Remove(KeyValuePair item) + { + return ((ICollection, TValue>>)_hm).Remove( + new KeyValuePair, TValue>(new WeakKey(item.Key), item.Value)); + } + + public int Count + { + get + { + CleanIfNeeded(); + return _hm.Count; + } + } + + public bool IsReadOnly + { + get { return false; } + } + + public bool ContainsKey(TKey key) + { + return _hm.ContainsKey(new WeakKey(key)); + } + + public void Add(TKey key, TValue value) + { + CleanIfNeeded(); + _hm.Add(new WeakKey(key), value); + } + + public bool Remove(TKey key) + { + return _hm.Remove(new WeakKey(key)); + } + + public bool TryGetValue(TKey key, out TValue value) + { + return _hm.TryGetValue(new WeakKey(key), out value); + } + + public TValue this[TKey key] + { + get { return _hm[new WeakKey(key)]; } + set + { + CleanIfNeeded(); + _hm[new WeakKey(key)] = value; + } + } + + public ICollection Keys + { + get + { + CleanIfNeeded(); + return new KeyCollection(_hm); + } + } + + public ICollection Values + { + get + { + CleanIfNeeded(); + return _hm.Values; + } + } + + void ICollection>.CopyTo(KeyValuePair[] array, int arrayIndex) + { + throw new NotSupportedException(); + } + + #region KeyCollection + class KeyCollection : ICollection + { + private readonly HashMap, TValue> _internalDict; + + public KeyCollection(HashMap, TValue> dict) + { + _internalDict = dict; + } + + public IEnumerator GetEnumerator() + { + foreach (var key in _internalDict.Keys) + { + yield return key.Target; + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + public void CopyTo(TKey[] array, int arrayIndex) + { + throw new NotImplementedException("Implement this as needed"); + } + + public int Count + { + get { return _internalDict.Count + 1; } + } + + public bool IsReadOnly + { + get { return true; } + } + + #region Explicit Interface Definitions + bool ICollection.Contains(TKey item) + { + throw new NotSupportedException(); + } + + void ICollection.Add(TKey item) + { + throw new NotSupportedException(); + } + + void ICollection.Clear() + { + throw new NotSupportedException(); + } + + bool ICollection.Remove(TKey item) + { + throw new NotSupportedException(); + } + #endregion + } + #endregion + + + /// + /// A weak reference wrapper for the hashtable keys. Whenever a key\value pair + /// is added to the hashtable, the key is wrapped using a WeakKey. WeakKey saves the + /// value of the original object hashcode for fast comparison. + /// + class WeakKey + { + WeakReference reference; + int hashCode; + + public WeakKey(T key) + { + if (key == null) + throw new ArgumentNullException("key"); + + hashCode = key.GetHashCode(); + reference = new WeakReference(key); + } + + public override int GetHashCode() + { + return hashCode; + } + + public override bool Equals(object obj) + { + if (!reference.IsAlive || obj == null) return false; + + if (object.ReferenceEquals(this, obj)) + { + return true; + } + + if (obj is WeakKey) + { + var other = (WeakKey)obj; + + var referenceTarget = reference.Target; // Careful: can be null in the mean time... + return referenceTarget != null && referenceTarget.Equals(other.Target); + } + + return false; + } + + public T Target + { + get { return (T)reference.Target; } + } + + public bool IsAlive + { + get { return reference.IsAlive; } + } + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Util/ArrayUtil.cs b/external/Lucene.Net.Light/src/core/Util/ArrayUtil.cs new file mode 100644 index 0000000000..7ab69c9f4b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/ArrayUtil.cs @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + /// Methods for manipulating arrays. + public sealed class ArrayUtil + { + /* + Begin Apache Harmony code + + Revision taken on Friday, June 12. https://svn.apache.org/repos/asf/harmony/enhanced/classlib/archive/java6/modules/luni/src/main/java/java/lang/Integer.java + + */ + + /// Parses the string argument as if it was an int value and returns the + /// result. Throws NumberFormatException if the string does not represent an + /// int quantity. + /// + /// + /// a string representation of an int quantity. + /// + /// int the value represented by the argument + /// + /// NumberFormatException if the argument could not be parsed as an int quantity. + public static int ParseInt(char[] chars) + { + return ParseInt(chars, 0, chars.Length, 10); + } + + /// Parses a char array into an int. + /// the character array + /// + /// The offset into the array + /// + /// The length + /// + /// the int + /// + /// NumberFormatException if it can't parse + public static int ParseInt(char[] chars, int offset, int len) + { + return ParseInt(chars, offset, len, 10); + } + + /// Parses the string argument as if it was an int value and returns the + /// result. Throws NumberFormatException if the string does not represent an + /// int quantity. The second argument specifies the radix to use when parsing + /// the value. + /// + /// + /// a string representation of an int quantity. + /// + /// + /// + /// the base to use for conversion. + /// + /// int the value represented by the argument + /// + /// NumberFormatException if the argument could not be parsed as an int quantity. + public static int ParseInt(char[] chars, int offset, int len, int radix) + { + if (chars == null || radix < 2 || radix > 36) + { + throw new System.FormatException(); + } + int i = 0; + if (len == 0) + { + throw new System.FormatException("chars length is 0"); + } + bool negative = chars[offset + i] == '-'; + if (negative && ++i == len) + { + throw new System.FormatException("can't convert to an int"); + } + if (negative == true) + { + offset++; + len--; + } + return Parse(chars, offset, len, radix, negative); + } + + + private static int Parse(char[] chars, int offset, int len, int radix, bool negative) + { + int max = System.Int32.MinValue / radix; + int result = 0; + for (int i = 0; i < len; i++) + { + int digit = (int) System.Char.GetNumericValue(chars[i + offset]); + if (digit == - 1) + { + throw new System.FormatException("Unable to parse"); + } + if (max > result) + { + throw new System.FormatException("Unable to parse"); + } + int next = result * radix - digit; + if (next > result) + { + throw new System.FormatException("Unable to parse"); + } + result = next; + } + /*while (offset < len) { + + }*/ + if (!negative) + { + result = - result; + if (result < 0) + { + throw new System.FormatException("Unable to parse"); + } + } + return result; + } + + + /* + + END APACHE HARMONY CODE + */ + + + public static int GetNextSize(int targetSize) + { + /* This over-allocates proportional to the list size, making room + * for additional growth. The over-allocation is mild, but is + * enough to give linear-time amortized behavior over a long + * sequence of appends() in the presence of a poorly-performing + * system realloc(). + * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... + */ + return (targetSize >> 3) + (targetSize < 9?3:6) + targetSize; + } + + public static int GetShrinkSize(int currentSize, int targetSize) + { + int newSize = GetNextSize(targetSize); + // Only reallocate if we are "substantially" smaller. + // This saves us from "running hot" (constantly making a + // bit bigger then a bit smaller, over and over): + if (newSize < currentSize / 2) + return newSize; + else + return currentSize; + } + + public static int[] Grow(int[] array, int minSize) + { + if (array.Length < minSize) + { + int[] newArray = new int[GetNextSize(minSize)]; + Array.Copy(array, 0, newArray, 0, array.Length); + return newArray; + } + else + return array; + } + + public static int[] Grow(int[] array) + { + return Grow(array, 1 + array.Length); + } + + public static int[] Shrink(int[] array, int targetSize) + { + int newSize = GetShrinkSize(array.Length, targetSize); + if (newSize != array.Length) + { + int[] newArray = new int[newSize]; + Array.Copy(array, 0, newArray, 0, newSize); + return newArray; + } + else + return array; + } + + public static long[] Grow(long[] array, int minSize) + { + if (array.Length < minSize) + { + long[] newArray = new long[GetNextSize(minSize)]; + Array.Copy(array, 0, newArray, 0, array.Length); + return newArray; + } + else + return array; + } + + public static long[] Grow(long[] array) + { + return Grow(array, 1 + array.Length); + } + + public static long[] Shrink(long[] array, int targetSize) + { + int newSize = GetShrinkSize(array.Length, targetSize); + if (newSize != array.Length) + { + long[] newArray = new long[newSize]; + Array.Copy(array, 0, newArray, 0, newSize); + return newArray; + } + else + return array; + } + + public static byte[] Grow(byte[] array, int minSize) + { + if (array.Length < minSize) + { + byte[] newArray = new byte[GetNextSize(minSize)]; + Array.Copy(array, 0, newArray, 0, array.Length); + return newArray; + } + else + return array; + } + + public static byte[] Grow(byte[] array) + { + return Grow(array, 1 + array.Length); + } + + public static byte[] Shrink(byte[] array, int targetSize) + { + int newSize = GetShrinkSize(array.Length, targetSize); + if (newSize != array.Length) + { + byte[] newArray = new byte[newSize]; + Array.Copy(array, 0, newArray, 0, newSize); + return newArray; + } + else + return array; + } + + /// Returns hash of chars in range start (inclusive) to + /// end (inclusive) + /// + public static int HashCode(char[] array, int start, int end) + { + int code = 0; + for (int i = end - 1; i >= start; i--) + code = code * 31 + array[i]; + return code; + } + + /// Returns hash of chars in range start (inclusive) to + /// end (inclusive) + /// + public static int HashCode(byte[] array, int start, int end) + { + int code = 0; + for (int i = end - 1; i >= start; i--) + code = code * 31 + array[i]; + return code; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/Attribute.cs b/external/Lucene.Net.Light/src/core/Util/Attribute.cs new file mode 100644 index 0000000000..b0a76b83d5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/Attribute.cs @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + /// Base class for Attributes that can be added to a + /// . + ///

+ /// Attributes are used to add data in a dynamic, yet type-safe way to a source + /// of usually streamed objects, e. g. a . + ///

+ [Serializable] + public abstract class Attribute : System.ICloneable, IAttribute + { + /// Clears the values in this AttributeImpl and resets it to its + /// default value. If this implementation implements more than one Attribute interface + /// it clears all. + /// + public abstract void Clear(); + + /// The default implementation of this method accesses all declared + /// fields of this object and prints the values in the following syntax: + /// + /// + /// public String toString() { + /// return "start=" + startOffset + ",end=" + endOffset; + /// } + /// + /// + /// This method may be overridden by subclasses. + /// + public override System.String ToString() + { + System.Text.StringBuilder buffer = new System.Text.StringBuilder(); + System.Type clazz = this.GetType(); + System.Reflection.FieldInfo[] fields = clazz.GetFields(System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.DeclaredOnly | System.Reflection.BindingFlags.Static); + try + { + for (int i = 0; i < fields.Length; i++) + { + System.Reflection.FieldInfo f = fields[i]; + if (f.IsStatic) + continue; + //f.setAccessible(true); // {{Aroush-2.9}} java.lang.reflect.AccessibleObject.setAccessible + System.Object value_Renamed = f.GetValue(this); + if (buffer.Length > 0) + { + buffer.Append(','); + } + if (value_Renamed == null) + { + buffer.Append(f.Name + "=null"); + } + else + { + buffer.Append(f.Name + "=" + value_Renamed); + } + } + } + catch (System.UnauthorizedAccessException e) + { + // this should never happen, because we're just accessing fields + // from 'this' + throw new System.SystemException(e.Message, e); + } + + return buffer.ToString(); + } + + /// Subclasses must implement this method and should compute + /// a hashCode similar to this: + /// + /// public int hashCode() { + /// int code = startOffset; + /// code = code * 31 + endOffset; + /// return code; + /// } + /// + /// + /// see also + /// + abstract public override int GetHashCode(); + + /// All values used for computation of + /// should be checked here for equality. + /// + /// see also + /// + abstract public override bool Equals(System.Object other); + + /// Copies the values from this Attribute into the passed-in + /// target attribute. The target implementation must support all the + /// Attributes this implementation supports. + /// + public abstract void CopyTo(Attribute target); + + /// Shallow clone. Subclasses must override this if they + /// need to clone any members deeply, + /// + public virtual System.Object Clone() + { + System.Object clone = null; + try + { + clone = base.MemberwiseClone(); + } + catch (System.Exception e) + { + throw new System.SystemException(e.Message, e); // shouldn't happen + } + return clone; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/AttributeSource.cs b/external/Lucene.Net.Light/src/core/Util/AttributeSource.cs new file mode 100644 index 0000000000..6d9265122a --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/AttributeSource.cs @@ -0,0 +1,510 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using Lucene.Net.Support; +using TokenStream = Lucene.Net.Analysis.TokenStream; + +namespace Lucene.Net.Util +{ + + /// An AttributeSource contains a list of different s, + /// and methods to add and get them. There can only be a single instance + /// of an attribute in the same AttributeSource instance. This is ensured + /// by passing in the actual type of the Attribute (Class<Attribute>) to + /// the , which then checks if an instance of + /// that type is already present. If yes, it returns the instance, otherwise + /// it creates a new instance and returns it. + /// + public class AttributeSource + { + /// An AttributeFactory creates instances of s. + public abstract class AttributeFactory + { + /// returns an for the supplied interface class. + public abstract Attribute CreateAttributeInstance() where T : IAttribute; + + /// This is the default factory that creates s using the + /// class name of the supplied interface class by appending Impl to it. + /// + public static readonly AttributeFactory DEFAULT_ATTRIBUTE_FACTORY = new DefaultAttributeFactory(); + + private sealed class DefaultAttributeFactory:AttributeFactory + { + // This should be WeakDictionary> where typeof(T) is Attribute and TImpl is typeof(AttributeImpl) + private static readonly WeakDictionary attClassImplMap = + new WeakDictionary(); + + internal DefaultAttributeFactory() + { + } + + public override Attribute CreateAttributeInstance() + { + try + { + return (Attribute)System.Activator.CreateInstance(GetClassForInterface()); + } + catch (System.UnauthorizedAccessException) + { + throw new System.ArgumentException("Could not instantiate implementing class for " + typeof(TAttImpl).FullName); + } + //catch (System.Exception e) + //{ + // throw new System.ArgumentException("Could not instantiate implementing class for " + typeof(TAttImpl).FullName); + //} + } + + private static System.Type GetClassForInterface() where T : IAttribute + { + lock (attClassImplMap) + { + var attClass = typeof (T); + WeakReference refz = attClassImplMap[attClass]; + System.Type clazz = (refz == null) ? null : ((System.Type) refz.Target); + if (clazz == null) + { + try + { + string name = attClass.FullName.Replace(attClass.Name, attClass.Name.Substring(1)) + ", " + attClass.Assembly.FullName; + attClassImplMap.Add(attClass, new WeakReference( clazz = System.Type.GetType(name, true))); //OK + } + catch (System.TypeLoadException) // was System.Exception + { + throw new System.ArgumentException("Could not find implementing class for " + attClass.FullName); + } + } + return clazz; + } + } + } + } + + // These two maps must always be in sync!!! + // So they are private, final and read-only from the outside (read-only iterators) + private GeneralKeyedCollection attributes; + private GeneralKeyedCollection attributeImpls; + + private State[] currentState = null; + private AttributeFactory factory; + + /// An AttributeSource using the default attribute factory . + public AttributeSource():this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY) + { + } + + /// An AttributeSource that uses the same attributes as the supplied one. + public AttributeSource(AttributeSource input) + { + if (input == null) + { + throw new System.ArgumentException("input AttributeSource must not be null"); + } + this.attributes = input.attributes; + this.attributeImpls = input.attributeImpls; + this.currentState = input.currentState; + this.factory = input.factory; + } + + /// An AttributeSource using the supplied for creating new instances. + public AttributeSource(AttributeFactory factory) + { + this.attributes = new GeneralKeyedCollection(att => att.Key); + this.attributeImpls = new GeneralKeyedCollection(att => att.Key); + this.currentState = new State[1]; + this.factory = factory; + } + + /// Returns the used AttributeFactory. + public virtual AttributeFactory Factory + { + get { return factory; } + } + + /// Returns a new iterator that iterates the attribute classes + /// in the same order they were added in. + /// Signature for Java 1.5: public Iterator<Class<? extends Attribute>> getAttributeClassesIterator() + /// + /// Note that this return value is different from Java in that it enumerates over the values + /// and not the keys + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual IEnumerable GetAttributeTypesIterator() + { + return this.attributes.Select(item => item.Key); + } + + /// Returns a new iterator that iterates all unique Attribute implementations. + /// This iterator may contain less entries that , + /// if one instance implements more than one Attribute interface. + /// Signature for Java 1.5: public Iterator<AttributeImpl> getAttributeImplsIterator() + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public virtual IEnumerable GetAttributeImplsIterator() + { + var initState = GetCurrentState(); + while (initState != null) + { + var att = initState.attribute; + initState = initState.next; + yield return att; + } + } + + /// a cache that stores all interfaces for known implementation classes for performance (slow reflection) + private static readonly WeakDictionary> + knownImplClasses = new WeakDictionary>(); + + /// + /// Expert: Adds a custom AttributeImpl instance with one or more Attribute interfaces. + ///

Please note: It is not guaranteed, that att is added to + /// the AttributeSource, because the provided attributes may already exist. + /// You should always retrieve the wanted attributes using after adding + /// with this method and cast to your class. + /// The recommended way to use custom implementations is using an + ///

+ ///
+ public virtual void AddAttributeImpl(Attribute att) + { + System.Type clazz = att.GetType(); + if (attributeImpls.Contains(clazz)) + return ; + System.Collections.Generic.LinkedList foundInterfaces; + lock (knownImplClasses) + { + foundInterfaces = knownImplClasses[clazz]; + if (foundInterfaces == null) + { + // we have a strong reference to the class instance holding all interfaces in the list (parameter "att"), + // so all WeakReferences are never evicted by GC + knownImplClasses.Add(clazz, foundInterfaces = new LinkedList()); + // find all interfaces that this attribute instance implements + // and that extend the Attribute interface + System.Type actClazz = clazz; + do + { + System.Type[] interfaces = actClazz.GetInterfaces(); + for (int i = 0; i < interfaces.Length; i++) + { + System.Type curInterface = interfaces[i]; + if (curInterface != typeof(IAttribute) && typeof(IAttribute).IsAssignableFrom(curInterface)) + { + foundInterfaces.AddLast(new WeakReference(curInterface)); + } + } + actClazz = actClazz.BaseType; + } + while (actClazz != null); + } + } + + // add all interfaces of this AttributeImpl to the maps + foreach(var curInterfaceRef in foundInterfaces) + { + System.Type curInterface = (System.Type) curInterfaceRef.Target; + System.Diagnostics.Debug.Assert(curInterface != null, + "We have a strong reference on the class holding the interfaces, so they should never get evicted"); + // Attribute is a superclass of this interface + if (!attributes.ContainsKey(curInterface)) + { + // invalidate state to force recomputation in captureState() + this.currentState[0] = null; + attributes.Add(new AttributeImplItem(curInterface, att)); + if (!attributeImpls.ContainsKey(clazz)) + { + attributeImpls.Add(new AttributeImplItem(clazz, att)); + } + } + } + } + + /// The caller must pass in a Class<? extends Attribute> value. + /// This method first checks if an instance of that class is + /// already in this AttributeSource and returns it. Otherwise a + /// new instance is created, added to this AttributeSource and returned. + /// + // NOTE: Java has Class, .NET has no Type, this is not a perfect port + public virtual T AddAttribute() where T : IAttribute + { + var attClass = typeof (T); + if (!attributes.ContainsKey(attClass)) + { + if (!(attClass.IsInterface && typeof(IAttribute).IsAssignableFrom(attClass))) + { + throw new ArgumentException( + "AddAttribute() only accepts an interface that extends Attribute, but " + + attClass.FullName + " does not fulfil this contract." + ); + } + + AddAttributeImpl(this.factory.CreateAttributeInstance()); + } + + return (T)(IAttribute)attributes[attClass].Value; + } + + /// Returns true, iff this AttributeSource has any attributes + public virtual bool HasAttributes + { + get { return this.attributes.Count != 0; } + } + + /// The caller must pass in a Class<? extends Attribute> value. + /// Returns true, iff this AttributeSource contains the passed-in Attribute. + /// \ + public virtual bool HasAttribute() where T : IAttribute + { + return this.attributes.Contains(typeof(T)); + } + + /// + /// The caller must pass in a Class<? extends Attribute> value. + /// Returns the instance of the passed in Attribute contained in this AttributeSource + /// + /// + /// IllegalArgumentException if this AttributeSource does not contain the Attribute. + /// It is recommended to always use even in consumers + /// of TokenStreams, because you cannot know if a specific TokenStream really uses + /// a specific Attribute. will automatically make the attribute + /// available. If you want to only use the attribute, if it is available (to optimize + /// consuming), use . + /// + // NOTE: Java has Class, .NET has no Type, this is not a perfect port + public virtual T GetAttribute() where T : IAttribute + { + var attClass = typeof (T); + if (!this.attributes.ContainsKey(attClass)) + { + throw new System.ArgumentException("This AttributeSource does not have the attribute '" + attClass.FullName + "'."); + } + else + { + return (T)(IAttribute)this.attributes[attClass].Value; + } + } + + /// This class holds the state of an AttributeSource. + /// + /// + /// + /// + public sealed class State : System.ICloneable + { + internal /*private*/ Attribute attribute; + internal /*private*/ State next; + + public System.Object Clone() + { + State clone = new State(); + clone.attribute = (Attribute) attribute.Clone(); + + if (next != null) + { + clone.next = (State) next.Clone(); + } + + return clone; + } + } + + private State GetCurrentState() + { + var s = currentState[0]; + if (s != null || !HasAttributes) + { + return s; + } + + var c = s = currentState[0] = new State(); + var it = attributeImpls.Values().GetEnumerator(); + it.MoveNext(); + c.attribute = it.Current.Value; + + while (it.MoveNext()) + { + c.next = new State(); + c = c.next; + c.attribute = it.Current.Value; + } + + return s; + } + + /// Resets all Attributes in this AttributeSource by calling + /// on each Attribute implementation. + /// + public virtual void ClearAttributes() + { + for (var state = GetCurrentState(); state != null; state = state.next) + { + state.attribute.Clear(); + } + } + + /// Captures the state of all Attributes. The return value can be passed to + /// to restore the state of this or another AttributeSource. + /// + public virtual State CaptureState() + { + var state = this.GetCurrentState(); + return (state == null) ? null : (State) state.Clone(); + } + + /// Restores this state by copying the values of all attribute implementations + /// that this state contains into the attributes implementations of the targetStream. + /// The targetStream must contain a corresponding instance for each argument + /// contained in this state (e.g. it is not possible to restore the state of + /// an AttributeSource containing a TermAttribute into a AttributeSource using + /// a Token instance as implementation). + /// + /// Note that this method does not affect attributes of the targetStream + /// that are not contained in this state. In other words, if for example + /// the targetStream contains an OffsetAttribute, but this state doesn't, then + /// the value of the OffsetAttribute remains unchanged. It might be desirable to + /// reset its value to the default, in which case the caller should first + /// call on the targetStream. + /// + public virtual void RestoreState(State state) + { + if (state == null) + return ; + + do + { + if (!attributeImpls.ContainsKey(state.attribute.GetType())) + { + throw new System.ArgumentException("State contains an AttributeImpl that is not in this AttributeSource"); + } + state.attribute.CopyTo(attributeImpls[state.attribute.GetType()].Value); + state = state.next; + } + while (state != null); + } + + public override int GetHashCode() + { + var code = 0; + + for (var state = GetCurrentState(); state != null; state = state.next) + { + code = code*31 + state.attribute.GetHashCode(); + } + + return code; + } + + public override bool Equals(System.Object obj) + { + if (obj == this) + { + return true; + } + + if (obj is AttributeSource) + { + AttributeSource other = (AttributeSource) obj; + + if (HasAttributes) + { + if (!other.HasAttributes) + { + return false; + } + + if (this.attributeImpls.Count != other.attributeImpls.Count) + { + return false; + } + + // it is only equal if all attribute impls are the same in the same order + var thisState = this.GetCurrentState(); + var otherState = other.GetCurrentState(); + while (thisState != null && otherState != null) + { + if (otherState.attribute.GetType() != thisState.attribute.GetType() || !otherState.attribute.Equals(thisState.attribute)) + { + return false; + } + thisState = thisState.next; + otherState = otherState.next; + } + return true; + } + else + { + return !other.HasAttributes; + } + } + else + return false; + } + + public override System.String ToString() + { + System.Text.StringBuilder sb = new System.Text.StringBuilder().Append('('); + + if (HasAttributes) + { + if (currentState[0] == null) + { + currentState[0] = GetCurrentState(); + } + for (var state = currentState[0]; state != null; state = state.next) + { + if (state != currentState[0]) + sb.Append(','); + sb.Append(state.attribute.ToString()); + } + } + return sb.Append(')').ToString(); + } + + /// Performs a clone of all instances returned in a new + /// AttributeSource instance. This method can be used to e.g. create another TokenStream + /// with exactly the same attributes (using ) + /// + public virtual AttributeSource CloneAttributes() + { + var clone = new AttributeSource(this.factory); + + // first clone the impls + if (HasAttributes) + { + for (var state = GetCurrentState(); state != null; state = state.next) + { + var impl = (Attribute) state.attribute.Clone(); + + if (!clone.attributeImpls.ContainsKey(impl.GetType())) + { + clone.attributeImpls.Add(new AttributeImplItem(impl.GetType(), impl)); + } + } + } + + // now the interfaces + foreach (var att in this.attributes) + { + clone.attributes.Add(new AttributeImplItem(att.Key, clone.attributeImpls[att.Value.GetType()].Value)); + } + + return clone; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/AverageGuessMemoryModel.cs b/external/Lucene.Net.Light/src/core/Util/AverageGuessMemoryModel.cs new file mode 100644 index 0000000000..43dae7ac11 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/AverageGuessMemoryModel.cs @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + /// An average, best guess, MemoryModel that should work okay on most systems. + /// + /// + public class AverageGuessMemoryModel:MemoryModel + { + public AverageGuessMemoryModel() + { + InitBlock(); + } + + private void InitBlock() + { + sizes = new IdentityDictionary() + { + {typeof (bool), 1}, + {typeof (byte), 1}, + {typeof(sbyte), 1}, + {typeof (char), 2}, + {typeof (short), 2}, + {typeof (int), 4}, + {typeof (float), 4}, + {typeof (double), 8}, + {typeof (long), 8} + }; + } + // best guess primitive sizes + private System.Collections.Generic.Dictionary sizes; + + /* + * (non-Javadoc) + * + * + */ + + public override int ArraySize + { + get { return 16; } + } + + /* + * (non-Javadoc) + * + * + */ + + public override int ClassSize + { + get { return 8; } + } + + /* (non-Javadoc) + * + */ + public override int GetPrimitiveSize(Type clazz) + { + return sizes[clazz]; + } + + /* (non-Javadoc) + * + */ + + public override int ReferenceSize + { + get { return 4; } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/BitUtil.cs b/external/Lucene.Net.Light/src/core/Util/BitUtil.cs new file mode 100644 index 0000000000..f5cbd79d02 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/BitUtil.cs @@ -0,0 +1,894 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Util +{ + // from org.apache.solr.util rev 555343 + + /// A variety of high efficiencly bit twiddling routines. + /// + /// + /// $Id$ + /// + public class BitUtil + { + + /// Returns the number of bits set in the long + public static int Pop(long x) + { + /* Hacker's Delight 32 bit pop function: + * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc + * + int pop(unsigned x) { + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0F0F0F0F; + x = x + (x >> 8); + x = x + (x >> 16); + return x & 0x0000003F; + } + ***/ + + // 64 bit java version of the C function from above + x = x - ((Number.URShift(x, 1)) & 0x5555555555555555L); + x = (x & 0x3333333333333333L) + ((Number.URShift(x, 2)) & 0x3333333333333333L); + x = (x + (Number.URShift(x, 4))) & 0x0F0F0F0F0F0F0F0FL; + x = x + (Number.URShift(x, 8)); + x = x + (Number.URShift(x, 16)); + x = x + (Number.URShift(x, 32)); + return ((int) x) & 0x7F; + } + + /// Returns the number of set bits in an array of longs. + public static long Pop_array(long[] A, int wordOffset, int numWords) + { + /* + * Robert Harley and David Seal's bit counting algorithm, as documented + * in the revisions of Hacker's Delight + * http://www.hackersdelight.org/revisions.pdf + * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc + * + * This function was adapted to Java, and extended to use 64 bit words. + * if only we had access to wider registers like SSE from java... + * + * This function can be transformed to compute the popcount of other functions + * on bitsets via something like this: + * sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g' + * + */ + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) + { + /* C macro from Hacker's Delight + #define CSA(h,l, a,b,c) \ + {unsigned u = a ^ b; unsigned v = c; \ + h = (a & b) | (u & v); l = u ^ v;} + ***/ + + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, A[i], A[i+1]) + { + long b = A[i], c = A[i + 1]; + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, A[i+2], A[i+3]) + { + long b = A[i + 2], c = A[i + 3]; + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + //CSA(twosA, ones, ones, A[i+4], A[i+5]) + { + long b = A[i + 4], c = A[i + 5]; + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, A[i+6], A[i+7]) + { + long b = A[i + 6], c = A[i + 7]; + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + //CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += Pop(eights); + } + + // handle trailing words in a binary-search manner... + // derived from the loop above by setting specific elements to 0. + // the original method in Hackers Delight used a simple for loop: + // for (i = i; i < n; i++) // Add in the last elements + // tot = tot + pop(A[i]); + + if (i <= n - 4) + { + long twosA, twosB, foursA, eights; + { + long b = A[i], c = A[i + 1]; + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = A[i + 2], c = A[i + 3]; + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 4; + } + + if (i <= n - 2) + { + long b = A[i], c = A[i + 1]; + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 2; + } + + if (i < n) + { + tot += Pop(A[i]); + } + + tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3); + + return tot; + } + + /// Returns the popcount or cardinality of the two sets after an intersection. + /// Neither array is modified. + /// + public static long Pop_intersect(long[] A, long[] B, int wordOffset, int numWords) + { + // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g' + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) + { + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, (A[i] & B[i]), (A[i+1] & B[i+1])) + { + long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+2] & B[i+2]), (A[i+3] & B[i+3])) + { + long b = (A[i + 2] & B[i + 2]), c = (A[i + 3] & B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + //CSA(twosA, ones, ones, (A[i+4] & B[i+4]), (A[i+5] & B[i+5])) + { + long b = (A[i + 4] & B[i + 4]), c = (A[i + 5] & B[i + 5]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+6] & B[i+6]), (A[i+7] & B[i+7])) + { + long b = (A[i + 6] & B[i + 6]), c = (A[i + 7] & B[i + 7]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + //CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += Pop(eights); + } + + + if (i <= n - 4) + { + long twosA, twosB, foursA, eights; + { + long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = (A[i + 2] & B[i + 2]), c = (A[i + 3] & B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 4; + } + + if (i <= n - 2) + { + long b = (A[i] & B[i]), c = (A[i + 1] & B[i + 1]); + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 2; + } + + if (i < n) + { + tot += Pop((A[i] & B[i])); + } + + tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3); + + return tot; + } + + /// Returns the popcount or cardinality of the union of two sets. + /// Neither array is modified. + /// + public static long Pop_union(long[] A, long[] B, int wordOffset, int numWords) + { + // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \| B[\1]\)/g' + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) + { + /* C macro from Hacker's Delight + #define CSA(h,l, a,b,c) \ + {unsigned u = a ^ b; unsigned v = c; \ + h = (a & b) | (u & v); l = u ^ v;} + ***/ + + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, (A[i] | B[i]), (A[i+1] | B[i+1])) + { + long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+2] | B[i+2]), (A[i+3] | B[i+3])) + { + long b = (A[i + 2] | B[i + 2]), c = (A[i + 3] | B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + //CSA(twosA, ones, ones, (A[i+4] | B[i+4]), (A[i+5] | B[i+5])) + { + long b = (A[i + 4] | B[i + 4]), c = (A[i + 5] | B[i + 5]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+6] | B[i+6]), (A[i+7] | B[i+7])) + { + long b = (A[i + 6] | B[i + 6]), c = (A[i + 7] | B[i + 7]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + //CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += Pop(eights); + } + + + if (i <= n - 4) + { + long twosA, twosB, foursA, eights; + { + long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = (A[i + 2] | B[i + 2]), c = (A[i + 3] | B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 4; + } + + if (i <= n - 2) + { + long b = (A[i] | B[i]), c = (A[i + 1] | B[i + 1]); + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 2; + } + + if (i < n) + { + tot += Pop((A[i] | B[i])); + } + + tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3); + + return tot; + } + + /// Returns the popcount or cardinality of A & ~B + /// Neither array is modified. + /// + public static long Pop_andnot(long[] A, long[] B, int wordOffset, int numWords) + { + // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& ~B[\1]\)/g' + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) + { + /* C macro from Hacker's Delight + #define CSA(h,l, a,b,c) \ + {unsigned u = a ^ b; unsigned v = c; \ + h = (a & b) | (u & v); l = u ^ v;} + ***/ + + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i+1] & ~B[i+1])) + { + long b = (A[i] & ~ B[i]), c = (A[i + 1] & ~ B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+2] & ~B[i+2]), (A[i+3] & ~B[i+3])) + { + long b = (A[i + 2] & ~ B[i + 2]), c = (A[i + 3] & ~ B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + //CSA(twosA, ones, ones, (A[i+4] & ~B[i+4]), (A[i+5] & ~B[i+5])) + { + long b = (A[i + 4] & ~ B[i + 4]), c = (A[i + 5] & ~ B[i + 5]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+6] & ~B[i+6]), (A[i+7] & ~B[i+7])) + { + long b = (A[i + 6] & ~ B[i + 6]), c = (A[i + 7] & ~ B[i + 7]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + //CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += Pop(eights); + } + + + if (i <= n - 4) + { + long twosA, twosB, foursA, eights; + { + long b = (A[i] & ~ B[i]), c = (A[i + 1] & ~ B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = (A[i + 2] & ~ B[i + 2]), c = (A[i + 3] & ~ B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 4; + } + + if (i <= n - 2) + { + long b = (A[i] & ~ B[i]), c = (A[i + 1] & ~ B[i + 1]); + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 2; + } + + if (i < n) + { + tot += Pop((A[i] & ~ B[i])); + } + + tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3); + + return tot; + } + + public static long Pop_xor(long[] A, long[] B, int wordOffset, int numWords) + { + int n = wordOffset + numWords; + long tot = 0, tot8 = 0; + long ones = 0, twos = 0, fours = 0; + + int i; + for (i = wordOffset; i <= n - 8; i += 8) + { + /* C macro from Hacker's Delight + #define CSA(h,l, a,b,c) \ + {unsigned u = a ^ b; unsigned v = c; \ + h = (a & b) | (u & v); l = u ^ v;} + ***/ + + long twosA, twosB, foursA, foursB, eights; + + // CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i+1] ^ B[i+1])) + { + long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+2] ^ B[i+2]), (A[i+3] ^ B[i+3])) + { + long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursA, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + //CSA(twosA, ones, ones, (A[i+4] ^ B[i+4]), (A[i+5] ^ B[i+5])) + { + long b = (A[i + 4] ^ B[i + 4]), c = (A[i + 5] ^ B[i + 5]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + // CSA(twosB, ones, ones, (A[i+6] ^ B[i+6]), (A[i+7] ^ B[i+7])) + { + long b = (A[i + 6] ^ B[i + 6]), c = (A[i + 7] ^ B[i + 7]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + //CSA(foursB, twos, twos, twosA, twosB) + { + long u = twos ^ twosA; + foursB = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + + //CSA(eights, fours, fours, foursA, foursB) + { + long u = fours ^ foursA; + eights = (fours & foursA) | (u & foursB); + fours = u ^ foursB; + } + tot8 += Pop(eights); + } + + + if (i <= n - 4) + { + long twosA, twosB, foursA, eights; + { + long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]); + long u = ones ^ b; + twosA = (ones & b) | (u & c); + ones = u ^ c; + } + { + long b = (A[i + 2] ^ B[i + 2]), c = (A[i + 3] ^ B[i + 3]); + long u = ones ^ b; + twosB = (ones & b) | (u & c); + ones = u ^ c; + } + { + long u = twos ^ twosA; + foursA = (twos & twosA) | (u & twosB); + twos = u ^ twosB; + } + eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 4; + } + + if (i <= n - 2) + { + long b = (A[i] ^ B[i]), c = (A[i + 1] ^ B[i + 1]); + long u = ones ^ b; + long twosA = (ones & b) | (u & c); + ones = u ^ c; + + long foursA = twos & twosA; + twos = twos ^ twosA; + + long eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += Pop(eights); + i += 2; + } + + if (i < n) + { + tot += Pop((A[i] ^ B[i])); + } + + tot += (Pop(fours) << 2) + (Pop(twos) << 1) + Pop(ones) + (tot8 << 3); + + return tot; + } + + /* python code to generate ntzTable + def ntz(val): + if val==0: return 8 + i=0 + while (val&0x01)==0: + i = i+1 + val >>= 1 + return i + print ','.join([ str(ntz(i)) for i in range(256) ]) + ***/ + + /// table of number of trailing zeros in a byte + // + public static readonly byte[] ntzTable = new byte[] + { + 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, + 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, + 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, + 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, + 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, + 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, + 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, + 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, + 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, + 0, 1, 0 + }; + + + /// Returns number of trailing zeros in a 64 bit long value. + public static int Ntz(long val) + { + // A full binary search to determine the low byte was slower than + // a linear search for nextSetBit(). This is most likely because + // the implementation of nextSetBit() shifts bits to the right, increasing + // the probability that the first non-zero byte is in the rhs. + // + // This implementation does a single binary search at the top level only + // so that all other bit shifting can be done on ints instead of longs to + // remain friendly to 32 bit architectures. In addition, the case of a + // non-zero first byte is checked for first because it is the most common + // in dense bit arrays. + + int lower = (int) val; + int lowByte = lower & 0xff; + if (lowByte != 0) + return ntzTable[lowByte]; + + if (lower != 0) + { + lowByte = (Number.URShift(lower, 8)) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 8; + lowByte = (Number.URShift(lower, 16)) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 16; + // no need to mask off low byte for the last byte in the 32 bit word + // no need to check for zero on the last byte either. + return ntzTable[Number.URShift(lower, 24)] + 24; + } + else + { + // grab upper 32 bits + int upper = (int) (val >> 32); + lowByte = upper & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 32; + lowByte = (Number.URShift(upper, 8)) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 40; + lowByte = (Number.URShift(upper, 16)) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 48; + // no need to mask off low byte for the last byte in the 32 bit word + // no need to check for zero on the last byte either. + return ntzTable[Number.URShift(upper, 24)] + 56; + } + } + + /// Returns number of trailing zeros in a 32 bit int value. + public static int Ntz(int val) + { + // This implementation does a single binary search at the top level only. + // In addition, the case of a non-zero first byte is checked for first + // because it is the most common in dense bit arrays. + + int lowByte = val & 0xff; + if (lowByte != 0) + return ntzTable[lowByte]; + lowByte = (Number.URShift(val, 8)) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 8; + lowByte = (Number.URShift(val, 16)) & 0xff; + if (lowByte != 0) + return ntzTable[lowByte] + 16; + // no need to mask off low byte for the last byte. + // no need to check for zero on the last byte either. + return ntzTable[Number.URShift(val, 24)] + 24; + } + + /// returns 0 based index of first set bit + /// (only works for x!=0) + ///
This is an alternate implementation of ntz() + ///
+ public static int Ntz2(long x) + { + int n = 0; + int y = (int) x; + if (y == 0) + { + n += 32; y = (int) (Number.URShift(x, 32)); + } // the only 64 bit shift necessary + if ((y & 0x0000FFFF) == 0) + { + n += 16; y = Number.URShift(y, 16); + } + if ((y & 0x000000FF) == 0) + { + n += 8; y = Number.URShift(y, 8); + } + return (ntzTable[y & 0xff]) + n; + } + + /// returns 0 based index of first set bit + ///
This is an alternate implementation of ntz() + ///
+ public static int Ntz3(long x) + { + // another implementation taken from Hackers Delight, extended to 64 bits + // and converted to Java. + // Many 32 bit ntz algorithms are at http://www.hackersdelight.org/HDcode/ntz.cc + int n = 1; + + // do the first step as a long, all others as ints. + int y = (int) x; + if (y == 0) + { + n += 32; y = (int) (Number.URShift(x, 32)); + } + if ((y & 0x0000FFFF) == 0) + { + n += 16; y = Number.URShift(y, 16); + } + if ((y & 0x000000FF) == 0) + { + n += 8; y = Number.URShift(y, 8); + } + if ((y & 0x0000000F) == 0) + { + n += 4; y = Number.URShift(y, 4); + } + if ((y & 0x00000003) == 0) + { + n += 2; y = Number.URShift(y, 2); + } + return n - (y & 1); + } + + + /// returns true if v is a power of two or zero + public static bool IsPowerOfTwo(int v) + { + return ((v & (v - 1)) == 0); + } + + /// returns true if v is a power of two or zero + public static bool IsPowerOfTwo(long v) + { + return ((v & (v - 1)) == 0); + } + + /// returns the next highest power of two, or the current value if it's already a power of two or zero + public static int NextHighestPowerOfTwo(int v) + { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; + } + + /// returns the next highest power of two, or the current value if it's already a power of two or zero + public static long NextHighestPowerOfTwo(long v) + { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + v++; + return v; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/BitVector.cs b/external/Lucene.Net.Light/src/core/Util/BitVector.cs new file mode 100644 index 0000000000..17b12126d4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/BitVector.cs @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using Directory = Lucene.Net.Store.Directory; +using IndexInput = Lucene.Net.Store.IndexInput; +using IndexOutput = Lucene.Net.Store.IndexOutput; + +namespace Lucene.Net.Util +{ + + /// Optimized implementation of a vector of bits. This is more-or-less like + /// java.util.BitSet, but also includes the following: + /// + /// a count() method, which efficiently computes the number of one bits; + /// optimized read from and write to disk; + /// inlinable get() method; + /// store and load, as bit set or d-gaps, depending on sparseness; + /// + /// + public sealed class BitVector : ICloneable + { + + private byte[] bits; + private int size; + private int count; + + /// Constructs a vector capable of holding n bits. + public BitVector(int n) + { + size = n; + bits = new byte[(size >> 3) + 1]; + count = 0; + } + + internal BitVector(byte[] bits, int size) + { + this.bits = bits; + this.size = size; + count = -1; + } + + public System.Object Clone() + { + byte[] copyBits = new byte[bits.Length]; + Array.Copy(bits, 0, copyBits, 0, bits.Length); + BitVector clone = new BitVector(copyBits, size); + clone.count = count; + return clone; + } + + /// Sets the value of bit to one. + public void Set(int bit) + { + if (bit >= size) + { + throw new System. IndexOutOfRangeException("Index of bound " + bit); + } + bits[bit >> 3] |= (byte) (1 << (bit & 7)); + count = - 1; + } + + /// Sets the value of bit to true, and + /// returns true if bit was already set + /// + public bool GetAndSet(int bit) + { + if (bit >= size) + { + throw new System. IndexOutOfRangeException("Index of bound " + bit); + } + int pos = bit >> 3; + int v = bits[pos]; + int flag = 1 << (bit & 7); + if ((flag & v) != 0) + return true; + else + { + bits[pos] = (byte) (v | flag); + if (count != - 1) + count++; + return false; + } + } + + /// Sets the value of bit to zero. + public void Clear(int bit) + { + if (bit >= size) + { + throw new System.IndexOutOfRangeException("Index of bound " + bit); + } + bits[bit >> 3] &= (byte) (~ (1 << (bit & 7))); + count = - 1; + } + + /// Returns true if bit is one and + /// false if it is zero. + /// + public bool Get(int bit) + { + System.Diagnostics.Debug.Assert(bit >= 0 && bit < size, "bit " + bit + " is out of bounds 0.." +(size - 1)); + return (bits[bit >> 3] & (1 << (bit & 7))) != 0; + } + + /// Returns the number of bits in this vector. This is also one greater than + /// the number of the largest valid bit number. + /// + public int Size() + { + return size; + } + + /// Returns the total number of one bits in this vector. This is efficiently + /// computed and cached, so that, if the vector is not changed, no + /// recomputation is done for repeated calls. + /// + public int Count() + { + // if the vector has been modified + if (count == - 1) + { + int c = 0; + int end = bits.Length; + for (int i = 0; i < end; i++) + c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte + count = c; + } + return count; + } + + /// + /// For testing + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1024:UsePropertiesWhereAppropriate")] + public int GetRecomputedCount() + { + int c = 0; + int end = bits.Length; + for (int i = 0; i < end; i++) + c += BYTE_COUNTS[bits[i] & 0xFF]; // sum bits per byte + return c; + } + + private static readonly byte[] BYTE_COUNTS = new byte[]{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + + + /// Writes this vector to the file name in Directory + /// d, in a format that can be read by the constructor + /// . + /// + public void Write(Directory d, System.String name) + { + IndexOutput output = d.CreateOutput(name); + try + { + if (IsSparse()) + { + WriteDgaps(output); // sparse bit-set more efficiently saved as d-gaps. + } + else + { + WriteBits(output); + } + } + finally + { + output.Close(); + } + } + + /// Write as a bit set + private void WriteBits(IndexOutput output) + { + output.WriteInt(Size()); // write size + output.WriteInt(Count()); // write count + output.WriteBytes(bits, bits.Length); + } + + /// Write as a d-gaps list + private void WriteDgaps(IndexOutput output) + { + output.WriteInt(- 1); // mark using d-gaps + output.WriteInt(Size()); // write size + output.WriteInt(Count()); // write count + int last = 0; + int n = Count(); + int m = bits.Length; + for (int i = 0; i < m && n > 0; i++) + { + if (bits[i] != 0) + { + output.WriteVInt(i - last); + output.WriteByte(bits[i]); + last = i; + n -= BYTE_COUNTS[bits[i] & 0xFF]; + } + } + } + + /// Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set. + private bool IsSparse() + { + // note: order of comparisons below set to favor smaller values (no binary range search.) + // note: adding 4 because we start with ((int) -1) to indicate d-gaps format. + // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore + // multiplying count by (8+8) or (8+16) or (8+24) etc.: + // - first 8 for writing bits[i] (1 byte vs. 1 bit), and + // - second part for writing the byte-number d-gap as vint. + // note: factor is for read/write of byte-arrays being faster than vints. + int factor = 10; + if (bits.Length < (1 << 7)) + return factor * (4 + (8 + 8) * Count()) < Size(); + if (bits.Length < (1 << 14)) + return factor * (4 + (8 + 16) * Count()) < Size(); + if (bits.Length < (1 << 21)) + return factor * (4 + (8 + 24) * Count()) < Size(); + if (bits.Length < (1 << 28)) + return factor * (4 + (8 + 32) * Count()) < Size(); + return factor * (4 + (8 + 40) * Count()) < Size(); + } + + /// Constructs a bit vector from the file name in Directory + /// d, as written by the method. + /// + public BitVector(Directory d, System.String name) + { + IndexInput input = d.OpenInput(name); + try + { + size = input.ReadInt(); // read size + if (size == - 1) + { + ReadDgaps(input); + } + else + { + ReadBits(input); + } + } + finally + { + input.Close(); + } + } + + /// Read as a bit set + private void ReadBits(IndexInput input) + { + count = input.ReadInt(); // read count + bits = new byte[(size >> 3) + 1]; // allocate bits + input.ReadBytes(bits, 0, bits.Length); + } + + /// read as a d-gaps list + private void ReadDgaps(IndexInput input) + { + size = input.ReadInt(); // (re)read size + count = input.ReadInt(); // read count + bits = new byte[(size >> 3) + 1]; // allocate bits + int last = 0; + int n = Count(); + while (n > 0) + { + last += input.ReadVInt(); + bits[last] = input.ReadByte(); + n -= BYTE_COUNTS[bits[last] & 0xFF]; + } + } + + /// Retrieve a subset of this BitVector. + /// + /// + /// starting index, inclusive + /// + /// ending index, exclusive + /// + /// subset + /// + public BitVector Subset(int start, int end) + { + if (start < 0 || end > Size() || end < start) + throw new System.IndexOutOfRangeException(); + // Special case -- return empty vector is start == end + if (end == start) + return new BitVector(0); + byte[] bits = new byte[(Number.URShift((end - start - 1), 3)) + 1]; + int s = Number.URShift(start, 3); + for (int i = 0; i < bits.Length; i++) + { + int cur = 0xFF & this.bits[i + s]; + int next = i + s + 1 >= this.bits.Length?0:0xFF & this.bits[i + s + 1]; + bits[i] = (byte) ((Number.URShift(cur, (start & 7))) | ((next << (8 - (start & 7))))); + } + int bitsToClear = (bits.Length * 8 - (end - start)) % 8; + bits[bits.Length - 1] &= (byte) (~ (0xFF << (8 - bitsToClear))); + return new BitVector(bits, end - start); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/Cache/Cache.cs b/external/Lucene.Net.Light/src/core/Util/Cache/Cache.cs new file mode 100644 index 0000000000..34ded72b23 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/Cache/Cache.cs @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util.Cache +{ + + + /// Base class for cache implementations. + public abstract class Cache : IDisposable + { + + /// Simple Cache wrapper that synchronizes all + /// calls that access the cache. + /// + internal class SynchronizedCache_Renamed_Class : Cache + { + internal System.Object mutex; + internal Cache cache; + + internal SynchronizedCache_Renamed_Class(Cache cache) + { + this.cache = cache; + this.mutex = this; + } + + internal SynchronizedCache_Renamed_Class(Cache cache, System.Object mutex) + { + this.cache = cache; + this.mutex = mutex; + } + + public override void Put(TKey key, TValue value_Renamed) + { + lock (mutex) + { + cache.Put(key, value_Renamed); + } + } + + public override TValue Get(System.Object key) + { + lock (mutex) + { + return cache.Get(key); + } + } + + public override bool ContainsKey(System.Object key) + { + lock (mutex) + { + return cache.ContainsKey(key); + } + } + + protected override void Dispose(bool disposing) + { + lock (mutex) + { + cache.Dispose(); + } + } + + internal override Cache GetSynchronizedCache() + { + return this; + } + } + + /// Returns a thread-safe cache backed by the specified cache. + /// In order to guarantee thread-safety, all access to the backed cache must + /// be accomplished through the returned cache. + /// + public static Cache SynchronizedCache(Cache cache) + { + return cache.GetSynchronizedCache(); + } + + /// Called by . This method + /// returns a instance that wraps + /// this instance by default and can be overridden to return + /// e. g. subclasses of or this + /// in case this cache is already synchronized. + /// + internal virtual Cache GetSynchronizedCache() + { + return new SynchronizedCache_Renamed_Class(this); + } + + /// Puts a (key, value)-pair into the cache. + public abstract void Put(TKey key, TValue value_Renamed); + + /// Returns the value for the given key. + public abstract TValue Get(System.Object key); + + /// Returns whether the given key is in this cache. + public abstract bool ContainsKey(System.Object key); + + /// Closes the cache. + [Obsolete("Use Dispose() instead")] + public void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + protected abstract void Dispose(bool disposing); + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/Cache/SimpleLRUCache.cs b/external/Lucene.Net.Light/src/core/Util/Cache/SimpleLRUCache.cs new file mode 100644 index 0000000000..2cde655fc1 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/Cache/SimpleLRUCache.cs @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Util.Cache +{ + public class SimpleLRUCache : SimpleMapCache + { + /// + /// The maximum number of items to cache. + /// + private int capacity; + + /// + /// The list to efficiently maintain the LRU state. + /// + private LinkedList> list; + + /// + /// The dictionary to hash into any location in the list. + /// + private Dictionary>> lookup; + + /// + /// The node instance to use/re-use when adding an item to the cache. + /// + private LinkedListNode> openNode; + + public SimpleLRUCache(int Capacity) + { + this.capacity = Capacity; + this.list = new LinkedList>(); + this.lookup = new Dictionary>>(Capacity + 1); + this.openNode = new LinkedListNode>(new ListValueEntry(default(TKey), default(TValue))); + } + + public override void Put(TKey Key, TValue Value) + { + if (Get(Key) == null) + { + this.openNode.Value.ItemKey = Key; + this.openNode.Value.ItemValue = Value; + this.list.AddFirst(this.openNode); + this.lookup.Add(Key, this.openNode); + + if (this.list.Count > this.capacity) + { + // last node is to be removed and saved for the next addition to the cache + this.openNode = this.list.Last; + + // remove from list & dictionary + this.list.RemoveLast(); + this.lookup.Remove(this.openNode.Value.ItemKey); + } + else + { + // still filling the cache, create a new open node for the next time + this.openNode = new LinkedListNode>(new ListValueEntry(default(TKey), default(TValue))); + } + } + } + + public override TValue Get(object Key) + { + LinkedListNode> node = null; + if (!this.lookup.TryGetValue((TKey)Key, out node)) + { + return default(TValue); + } + this.list.Remove(node); + this.list.AddFirst(node); + return node.Value.ItemValue; + } + + /// + /// Container to hold the key and value to aid in removal from + /// the dictionary when an item is removed from cache. + /// + class ListValueEntry where K : TKey + where V : TValue + { + internal V ItemValue; + internal K ItemKey; + + internal ListValueEntry(K key, V value) + { + this.ItemKey = key; + this.ItemValue = value; + } + } + } + + +#region NOT_USED_FROM_JLCA_PORT +/* + + // + // This is the oringal port as it was generated via JLCA. + // This code is not used. It's here for referance only. + // + + + /// Simple LRU cache implementation that uses a LinkedHashMap. + /// This cache is not synchronized, use + /// if needed. + /// + /// + public class SimpleLRUCache:SimpleMapCache + { + private class AnonymousClassLinkedHashMap : LinkedHashMap + { + public AnonymousClassLinkedHashMap(SimpleLRUCache enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(SimpleLRUCache enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SimpleLRUCache enclosingInstance; + public SimpleLRUCache Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + protected internal virtual bool RemoveEldestEntry(System.Collections.DictionaryEntry eldest) + { + return size() > Enclosing_Instance.cacheSize; + } + } + private const float LOADFACTOR = 0.75f; + + private int cacheSize; + + /// Creates a last-recently-used cache with the specified size. + public SimpleLRUCache(int cacheSize):base(null) + { + this.cacheSize = cacheSize; + int capacity = (int) System.Math.Ceiling(cacheSize / LOADFACTOR) + 1; + + base.map = new AnonymousClassLinkedHashMap(this, capacity, LOADFACTOR, true); + } + } +*/ +#endregion + +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/Cache/SimpleMapCache.cs b/external/Lucene.Net.Light/src/core/Util/Cache/SimpleMapCache.cs new file mode 100644 index 0000000000..b424bd1b06 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/Cache/SimpleMapCache.cs @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Util.Cache +{ + + /// Simple cache implementation that uses a HashMap to store (key, value) pairs. + /// This cache is not synchronized, use + /// if needed. + /// + public class SimpleMapCache : Cache + { + internal System.Collections.Generic.Dictionary map; + + public SimpleMapCache() + : this(new System.Collections.Generic.Dictionary()) + { + } + + public SimpleMapCache(System.Collections.Generic.Dictionary map) + { + this.map = map; + } + + public override TValue Get(System.Object key) + { + return map[(TKey)key]; + } + + public override void Put(TKey key, TValue value_Renamed) + { + map[key] = value_Renamed; + } + + public override bool ContainsKey(System.Object key) + { + return map.ContainsKey((TKey)key); + } + + protected override void Dispose(bool disposing) + { + // do nothing + } + + /// Returns a Set containing all keys in this cache. + public virtual System.Collections.Generic.HashSet KeySet() + { + return new HashSet(map.Keys); + } + + internal override Cache GetSynchronizedCache() + { + return new SynchronizedSimpleMapCache(this); + } + + // Why does does this use both inheritance and composition? + private class SynchronizedSimpleMapCache : SimpleMapCache + { + private System.Object mutex; + private SimpleMapCache cache; + + private bool isDisposed; + + internal SynchronizedSimpleMapCache(SimpleMapCache cache) + { + this.cache = cache; + this.mutex = this; + } + + public override void Put(TKey key, TValue value_Renamed) + { + lock (mutex) + { + cache.Put(key, value_Renamed); + } + } + + public override TValue Get(System.Object key) + { + lock (mutex) + { + return cache.Get(key); + } + } + + public override bool ContainsKey(System.Object key) + { + lock (mutex) + { + return cache.ContainsKey(key); + } + } + + protected override void Dispose(bool disposing) + { + lock (mutex) + { + if (isDisposed) return; + + if (disposing) + { + cache.Dispose(disposing); + } + + isDisposed = true; + base.Dispose(disposing); + } + } + + public override HashSet KeySet() + { + lock (mutex) + { + return cache.KeySet(); + } + } + + internal override Cache GetSynchronizedCache() + { + return this; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/CloseableThreadLocal.cs b/external/Lucene.Net.Light/src/core/Util/CloseableThreadLocal.cs new file mode 100644 index 0000000000..84e16e8be4 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/CloseableThreadLocal.cs @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading; +using Lucene.Net.Support; + +#if NET35 +using Lucene.Net.Support.Compatibility; +#endif + +namespace Lucene.Net.Util +{ + + /// Java's builtin ThreadLocal has a serious flaw: + /// it can take an arbitrarily long amount of time to + /// dereference the things you had stored in it, even once the + /// ThreadLocal instance itself is no longer referenced. + /// This is because there is single, master map stored for + /// each thread, which all ThreadLocals share, and that + /// master map only periodically purges "stale" entries. + /// + /// While not technically a memory leak, because eventually + /// the memory will be reclaimed, it can take a long time + /// and you can easily hit OutOfMemoryError because from the + /// GC's standpoint the stale entries are not reclaimaible. + /// + /// This class works around that, by only enrolling + /// WeakReference values into the ThreadLocal, and + /// separately holding a hard reference to each stored + /// value. When you call , these hard + /// references are cleared and then GC is freely able to + /// reclaim space by objects stored in it. + /// + /// + + public class CloseableThreadLocal : IDisposable where T : class + { + // NOTE: Java has WeakReference. This isn't available for .Net until 4.5 (according to msdn docs) + private ThreadLocal t = new ThreadLocal(); + + private IDictionary hardRefs = new HashMap(); + + private bool isDisposed; + + public virtual T InitialValue() + { + return null; + } + + public virtual T Get() + { + WeakReference weakRef = t.Get(); + if (weakRef == null) + { + T iv = InitialValue(); + if (iv != null) + { + Set(iv); + return iv; + } + else + return null; + } + else + { + return (T)weakRef.Get(); + } + } + + public virtual void Set(T @object) + { + //+-- For Debuging + if (CloseableThreadLocalProfiler.EnableCloseableThreadLocalProfiler == true) + { + lock (CloseableThreadLocalProfiler.Instances) + { + CloseableThreadLocalProfiler.Instances.Add(new WeakReference(@object)); + } + } + //+-- + + t.Set(new WeakReference(@object)); + + lock (hardRefs) + { + //hardRefs[Thread.CurrentThread] = @object; + hardRefs.Add(Thread.CurrentThread, @object); + + // Java's iterator can remove, .NET's cannot + var threadsToRemove = hardRefs.Keys.Where(thread => !thread.IsAlive).ToList(); + // Purge dead threads + foreach (var thread in threadsToRemove) + { + hardRefs.Remove(thread); + } + } + } + + [Obsolete("Use Dispose() instead")] + public virtual void Close() + { + Dispose(); + } + + public void Dispose() + { + Dispose(true); + } + + protected virtual void Dispose(bool disposing) + { + if (isDisposed) return; + + if (disposing) + { + // Clear the hard refs; then, the only remaining refs to + // all values we were storing are weak (unless somewhere + // else is still using them) and so GC may reclaim them: + hardRefs = null; + // Take care of the current thread right now; others will be + // taken care of via the WeakReferences. + if (t != null) + { + t.Remove(); + } + t = null; + } + + isDisposed = true; + } + } + + internal static class CloseableThreadLocalExtensions + { + public static void Set(this ThreadLocal t, T val) + { + t.Value = val; + } + + public static T Get(this ThreadLocal t) + { + return t.Value; + } + + public static void Remove(this ThreadLocal t) + { + t.Dispose(); + } + + public static object Get(this WeakReference w) + { + return w.Target; + } + } + + //// {{DIGY}} + //// To compile against Framework 2.0 + //// Uncomment below class + //public class ThreadLocal : IDisposable + //{ + // [ThreadStatic] + // static SupportClass.WeakHashTable slots; + + // void Init() + // { + // if (slots == null) slots = new SupportClass.WeakHashTable(); + // } + + // public T Value + // { + // set + // { + // Init(); + // slots.Add(this, value); + // } + // get + // { + // Init(); + // return (T)slots[this]; + // } + // } + + // public void Dispose() + // { + // if (slots != null) slots.Remove(this); + // } + //} +} diff --git a/external/Lucene.Net.Light/src/core/Util/Constants.cs b/external/Lucene.Net.Light/src/core/Util/Constants.cs new file mode 100644 index 0000000000..88761d20fa --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/Constants.cs @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using LucenePackage = Lucene.Net.LucenePackage; + +namespace Lucene.Net.Util +{ + + /// Some useful constants. + public sealed class Constants + { + private Constants() + { + } // can't construct + + /// The value of System.getProperty("java.version"). * + public static readonly System.String JAVA_VERSION = AppSettings.Get("java.version", ""); + /// True iff this is Java version 1.1. + public static readonly bool JAVA_1_1 = JAVA_VERSION.StartsWith("1.1."); + /// True iff this is Java version 1.2. + public static readonly bool JAVA_1_2 = JAVA_VERSION.StartsWith("1.2."); + /// True iff this is Java version 1.3. + public static readonly bool JAVA_1_3 = JAVA_VERSION.StartsWith("1.3."); + + /// The value of System.getProperty("os.name"). * + public static readonly System.String OS_NAME = GetEnvironmentVariable("OS","Windows_NT") ?? "Linux"; + /// True iff running on Linux. + public static readonly bool LINUX = OS_NAME.StartsWith("Linux"); + /// True iff running on Windows. + public static readonly bool WINDOWS = OS_NAME.StartsWith("Windows"); + /// True iff running on SunOS. + public static readonly bool SUN_OS = OS_NAME.StartsWith("SunOS"); + + public static readonly System.String OS_ARCH = GetEnvironmentVariable("PROCESSOR_ARCHITECTURE","x86"); + public static readonly System.String OS_VERSION = GetEnvironmentVariable("OS_VERSION", "?"); + public static readonly System.String JAVA_VENDOR = AppSettings.Get("java.vendor", ""); + + // NOTE: this logic may not be correct; if you know of a + // more reliable approach please raise it on java-dev! + public static bool JRE_IS_64BIT; + + // this method prevents inlining the final version constant in compiled + // classes, + // see: http://www.javaworld.com/community/node/3400 + private static System.String Ident(System.String s) + { + return s.ToString(); + } + + public static readonly System.String LUCENE_MAIN_VERSION = Ident("3.0.3"); + + public static System.String LUCENE_VERSION="8.8.8.8"; + static Constants() + { + if (IntPtr.Size == 8) + { + JRE_IS_64BIT = true;// 64 bit machine + } + else if (IntPtr.Size == 4) + { + JRE_IS_64BIT = false;// 32 bit machine + } + + try + { + LUCENE_VERSION = System.Reflection.Assembly.GetExecutingAssembly().GetName().Version.ToString(); + } + catch (System.Security.SecurityException) //Ignore in medium trust. + { + } + + } + + #region MEDIUM-TRUST Support + static string GetEnvironmentVariable(string variable, string defaultValueOnSecurityException) + { + try + { + if (variable == "OS_VERSION") return System.Environment.OSVersion.ToString(); + + return System.Environment.GetEnvironmentVariable(variable); + } + catch (System.Security.SecurityException) + { + return defaultValueOnSecurityException; + } + + } + #endregion + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/DocIdBitSet.cs b/external/Lucene.Net.Light/src/core/Util/DocIdBitSet.cs new file mode 100644 index 0000000000..1601ad25f3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/DocIdBitSet.cs @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections; +using Lucene.Net.Support; +using DocIdSet = Lucene.Net.Search.DocIdSet; +using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + +namespace Lucene.Net.Util +{ + /// Simple DocIdSet and DocIdSetIterator backed by a BitSet + public class DocIdBitSet:DocIdSet + { + private System.Collections.BitArray bitSet; + + public DocIdBitSet(System.Collections.BitArray bitSet) + { + this.bitSet = bitSet; + } + + public override DocIdSetIterator Iterator() + { + return new DocIdBitSetIterator(bitSet); + } + + /// This DocIdSet implementation is cacheable. + public override bool IsCacheable + { + get { return true; } + } + + /// Returns the underlying BitSet. + public virtual BitArray BitSet + { + get { return this.bitSet; } + } + + private class DocIdBitSetIterator:DocIdSetIterator + { + private int docId; + private System.Collections.BitArray bitSet; + + internal DocIdBitSetIterator(System.Collections.BitArray bitSet) + { + this.bitSet = bitSet; + this.docId = - 1; + } + + public override int DocID() + { + return docId; + } + + public override int NextDoc() + { + // (docId + 1) on next line requires -1 initial value for docNr: + int d = BitSetSupport.NextSetBit(bitSet, docId + 1); + // -1 returned by BitSet.nextSetBit() when exhausted + docId = d == - 1?NO_MORE_DOCS:d; + return docId; + } + + public override int Advance(int target) + { + int d = BitSetSupport.NextSetBit(bitSet, target); + // -1 returned by BitSet.nextSetBit() when exhausted + docId = d == - 1?NO_MORE_DOCS:d; + return docId; + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/FieldCacheSanityChecker.cs b/external/Lucene.Net.Light/src/core/Util/FieldCacheSanityChecker.cs new file mode 100644 index 0000000000..74569696f3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/FieldCacheSanityChecker.cs @@ -0,0 +1,439 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +using IndexReader = Lucene.Net.Index.IndexReader; +using FieldCache = Lucene.Net.Search.FieldCache; +using CacheEntry = Lucene.Net.Search.CacheEntry; + +namespace Lucene.Net.Util +{ + + /// Provides methods for sanity checking that entries in the FieldCache + /// are not wasteful or inconsistent. + ///

+ ///

+ /// Lucene 2.9 Introduced numerous enhancements into how the FieldCache + /// is used by the low levels of Lucene searching (for Sorting and + /// ValueSourceQueries) to improve both the speed for Sorting, as well + /// as reopening of IndexReaders. But these changes have shifted the + /// usage of FieldCache from "top level" IndexReaders (frequently a + /// MultiReader or DirectoryReader) down to the leaf level SegmentReaders. + /// As a result, existing applications that directly access the FieldCache + /// may find RAM usage increase significantly when upgrading to 2.9 or + /// Later. This class provides an API for these applications (or their + /// Unit tests) to check at run time if the FieldCache contains "insane" + /// usages of the FieldCache. + ///

+ ///

+ /// EXPERIMENTAL API: This API is considered extremely advanced and + /// experimental. It may be removed or altered w/o warning in future releases + /// of Lucene. + ///

+ ///

+ /// + /// + /// + /// + /// + /// + public sealed class FieldCacheSanityChecker + { + + private RamUsageEstimator ramCalc = null; + public FieldCacheSanityChecker() + { + /* NOOP */ + } + /// If set, will be used to estimate size for all CacheEntry objects + /// dealt with. + /// + public void SetRamUsageEstimator(RamUsageEstimator r) + { + ramCalc = r; + } + + + /// Quick and dirty convenience method + /// + /// + public static Insanity[] CheckSanity(FieldCache cache) + { + return CheckSanity(cache.GetCacheEntries()); + } + + /// Quick and dirty convenience method that instantiates an instance with + /// "good defaults" and uses it to test the CacheEntrys + /// + /// + /// + public static Insanity[] CheckSanity(params CacheEntry[] cacheEntries) + { + FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker(); + // doesn't check for interned + sanityChecker.SetRamUsageEstimator(new RamUsageEstimator(false)); + return sanityChecker.Check(cacheEntries); + } + + + /// Tests a CacheEntry[] for indication of "insane" cache usage. + ///

+ /// NOTE:FieldCache CreationPlaceholder objects are ignored. + /// (:TODO: is this a bad idea? are we masking a real problem?) + ///

+ ///

+ public Insanity[] Check(params CacheEntry[] cacheEntries) + { + if (null == cacheEntries || 0 == cacheEntries.Length) + return new Insanity[0]; + + if (null != ramCalc) + { + for (int i = 0; i < cacheEntries.Length; i++) + { + cacheEntries[i].EstimateSize(ramCalc); + } + } + + // the indirect mapping lets MapOfSet dedup identical valIds for us + // + // maps the (valId) identityhashCode of cache values to + // sets of CacheEntry instances + MapOfSets valIdToItems = new MapOfSets(new Dictionary>(17)); + // maps ReaderField keys to Sets of ValueIds + MapOfSets readerFieldToValIds = new MapOfSets(new Dictionary>(17)); + // + + // any keys that we know result in more then one valId + HashSet valMismatchKeys = new HashSet(); + + // iterate over all the cacheEntries to get the mappings we'll need + for (int i = 0; i < cacheEntries.Length; i++) + { + CacheEntry item = cacheEntries[i]; + System.Object val = item.Value; + + if (val is Lucene.Net.Search.CreationPlaceholder) + continue; + + ReaderField rf = new ReaderField(item.ReaderKey, item.FieldName); + + System.Int32 valId = val.GetHashCode(); + + // indirect mapping, so the MapOfSet will dedup identical valIds for us + valIdToItems.Put(valId, item); + if (1 < readerFieldToValIds.Put(rf, valId)) + { + valMismatchKeys.Add(rf); + } + } + + List insanity = new List(valMismatchKeys.Count * 3); + + insanity.AddRange(CheckValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys)); + insanity.AddRange(CheckSubreaders(valIdToItems, readerFieldToValIds)); + + return insanity.ToArray(); + } + + /// Internal helper method used by check that iterates over + /// valMismatchKeys and generates a Collection of Insanity + /// instances accordingly. The MapOfSets are used to populate + /// the Insantiy objects. + /// + /// + /// + private List CheckValueMismatch(MapOfSets valIdToItems, + MapOfSets readerFieldToValIds, + HashSet valMismatchKeys) + { + + List insanity = new List(valMismatchKeys.Count * 3); + + if (!(valMismatchKeys.Count == 0)) + { + // we have multiple values for some ReaderFields + + IDictionary> rfMap = readerFieldToValIds.Map; + IDictionary> valMap = valIdToItems.Map; + foreach (ReaderField rf in valMismatchKeys) + { + List badEntries = new List(valMismatchKeys.Count * 2); + foreach (int val in rfMap[rf]) + { + foreach (CacheEntry entry in valMap[val]) + { + badEntries.Add(entry); + } + } + + insanity.Add(new Insanity(InsanityType.VALUEMISMATCH, "Multiple distinct value objects for " + rf.ToString(), badEntries.ToArray())); + } + } + return insanity; + } + + /// Internal helper method used by check that iterates over + /// the keys of readerFieldToValIds and generates a Collection + /// of Insanity instances whenever two (or more) ReaderField instances are + /// found that have an ancestery relationships. + /// + /// + /// + /// + private List CheckSubreaders(MapOfSets valIdToItems, + MapOfSets readerFieldToValIds) + { + List insanity = new List(23); + + Dictionary> badChildren = new Dictionary>(17); + MapOfSets badKids = new MapOfSets(badChildren); // wrapper + + IDictionary> viToItemSets = valIdToItems.Map; + IDictionary> rfToValIdSets = readerFieldToValIds.Map; + + HashSet seen = new HashSet(); + + foreach (ReaderField rf in rfToValIdSets.Keys) + { + if (seen.Contains(rf)) + continue; + + System.Collections.IList kids = GetAllDecendentReaderKeys(rf.readerKey); + foreach (Object kidKey in kids) + { + ReaderField kid = new ReaderField(kidKey, rf.fieldName); + + if (badChildren.ContainsKey(kid)) + { + // we've already process this kid as RF and found other problems + // track those problems as our own + badKids.Put(rf, kid); + badKids.PutAll(rf, badChildren[kid]); + badChildren.Remove(kid); + } + else if (rfToValIdSets.ContainsKey(kid)) + { + // we have cache entries for the kid + badKids.Put(rf, kid); + } + seen.Add(kid); + } + seen.Add(rf); + } + + // every mapping in badKids represents an Insanity + foreach (ReaderField parent in badChildren.Keys) + { + HashSet kids = badChildren[parent]; + + List badEntries = new List(kids.Count * 2); + + // put parent entr(ies) in first + { + foreach (int val in rfToValIdSets[parent]) + { + badEntries.AddRange(viToItemSets[val]); + } + } + + // now the entries for the descendants + foreach (ReaderField kid in kids) + { + foreach (int val in rfToValIdSets[kid]) + { + badEntries.AddRange(viToItemSets[val]); + } + } + + insanity.Add(new Insanity(InsanityType.SUBREADER, "Found caches for decendents of " + parent.ToString(), badEntries.ToArray())); + } + + return insanity; + } + + /// Checks if the seed is an IndexReader, and if so will walk + /// the hierarchy of subReaders building up a list of the objects + /// returned by obj.getFieldCacheKey() + /// + private System.Collections.IList GetAllDecendentReaderKeys(System.Object seed) + { + List all = new List(17); // will grow as we iter + all.Add(seed); + for (int i = 0; i < all.Count; i++) + { + System.Object obj = all[i]; + if (obj is IndexReader) + { + IndexReader[] subs = ((IndexReader) obj).GetSequentialSubReaders(); + for (int j = 0; (null != subs) && (j < subs.Length); j++) + { + all.Add(subs[j].FieldCacheKey); + } + } + } + // need to skip the first, because it was the seed + return all.GetRange(1, all.Count - 1); + } + + /// Simple pair object for using "readerKey + fieldName" a Map key + private sealed class ReaderField + { + public System.Object readerKey; + public System.String fieldName; + public ReaderField(System.Object readerKey, System.String fieldName) + { + this.readerKey = readerKey; + this.fieldName = fieldName; + } + public override int GetHashCode() + { + return readerKey.GetHashCode() * fieldName.GetHashCode(); + } + public override bool Equals(System.Object that) + { + if (!(that is ReaderField)) + return false; + + ReaderField other = (ReaderField) that; + return (this.readerKey == other.readerKey && this.fieldName.Equals(other.fieldName)); + } + public override System.String ToString() + { + return readerKey.ToString() + "+" + fieldName; + } + } + + /// Simple container for a collection of related CacheEntry objects that + /// in conjunction with eachother represent some "insane" usage of the + /// FieldCache. + /// + public sealed class Insanity + { + private InsanityType type; + private System.String msg; + private CacheEntry[] entries; + public Insanity(InsanityType type, System.String msg, params CacheEntry[] entries) + { + if (null == type) + { + throw new System.ArgumentException("Insanity requires non-null InsanityType"); + } + if (null == entries || 0 == entries.Length) + { + throw new System.ArgumentException("Insanity requires non-null/non-empty CacheEntry[]"); + } + this.type = type; + this.msg = msg; + this.entries = entries; + } + + /// Type of insane behavior this object represents + public InsanityType Type + { + get { return type; } + } + + /// Description of hte insane behavior + public string Msg + { + get { return msg; } + } + + /// CacheEntry objects which suggest a problem + public CacheEntry[] GetCacheEntries() + { + return entries; + } + /// Multi-Line representation of this Insanity object, starting with + /// the Type and Msg, followed by each CacheEntry.toString() on it's + /// own line prefaced by a tab character + /// + public override System.String ToString() + { + System.Text.StringBuilder buf = new System.Text.StringBuilder(); + buf.Append(Type).Append(": "); + + System.String m = Msg; + if (null != m) + buf.Append(m); + + buf.Append('\n'); + + CacheEntry[] ce = GetCacheEntries(); + for (int i = 0; i < ce.Length; i++) + { + buf.Append('\t').Append(ce[i].ToString()).Append('\n'); + } + + return buf.ToString(); + } + } + + /// An Enumaration of the differnet types of "insane" behavior that + /// may be detected in a FieldCache. + /// + /// + /// + /// + /// + /// + /// + /// + public sealed class InsanityType + { + private System.String label; + internal InsanityType(System.String label) + { + this.label = label; + } + public override System.String ToString() + { + return label; + } + + /// Indicates an overlap in cache usage on a given field + /// in sub/super readers. + /// + public static readonly InsanityType SUBREADER = new InsanityType("SUBREADER"); + + ///

+ /// Indicates entries have the same reader+fieldname but + /// different cached values. This can happen if different datatypes, + /// or parsers are used -- and while it's not necessarily a bug + /// it's typically an indication of a possible problem. + ///

+ ///

+ /// PNOTE: Only the reader, fieldname, and cached value are actually + /// tested -- if two cache entries have different parsers or datatypes but + /// the cached values are the same Object (== not just equal()) this method + /// does not consider that a red flag. This allows for subtle variations + /// in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) + ///

+ ///

+ public static readonly InsanityType VALUEMISMATCH = new InsanityType("VALUEMISMATCH"); + + /// Indicates an expected bit of "insanity". This may be useful for + /// clients that wish to preserve/log information about insane usage + /// but indicate that it was expected. + /// + public static readonly InsanityType EXPECTED = new InsanityType("EXPECTED"); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/IAttribute.cs b/external/Lucene.Net.Light/src/core/Util/IAttribute.cs new file mode 100644 index 0000000000..e84313a857 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/IAttribute.cs @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + /// Base interface for attributes. + public interface IAttribute + { + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/IdentityDictionary.cs b/external/Lucene.Net.Light/src/core/Util/IdentityDictionary.cs new file mode 100644 index 0000000000..f23f91f482 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/IdentityDictionary.cs @@ -0,0 +1,64 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * +*/ + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.Serialization; +using System.Text; + +namespace Lucene.Net.Util +{ + /// + /// A class that mimics Java's IdentityHashMap in that it determines + /// object equality solely on ReferenceEquals rather than (possibly overloaded) + /// object.Equals(). + /// + /// NOTE: Java's documentation on IdentityHashMap says that it also uses + /// ReferenceEquals on it's Values as well. This class does not follow this behavior + /// + /// The type of the keys in the dictionary + /// The type of the values in the dictionary + public class IdentityDictionary : Dictionary + { + public IdentityDictionary(IDictionary other) : base(other, new IdentityComparer()) + { } + + public IdentityDictionary(int capacity) : base(capacity, new IdentityComparer()) + { } + + public IdentityDictionary() : this(16) + { } + + class IdentityComparer : IEqualityComparer + { + public bool Equals(TKey x, TKey y) + { + return ReferenceEquals(x, y); + } + + public int GetHashCode(TKey obj) + { + return obj.GetHashCode(); + } + } + } +} diff --git a/external/Lucene.Net.Light/src/core/Util/IndexableBinaryStringTools.cs b/external/Lucene.Net.Light/src/core/Util/IndexableBinaryStringTools.cs new file mode 100644 index 0000000000..c6c9f46e2f --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/IndexableBinaryStringTools.cs @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +// {{Aroush-2.9}} Port issue? Both of those were treated as: System.IO.MemoryStream +//using CharBuffer = java.nio.CharBuffer; +//using ByteBuffer = java.nio.ByteBuffer; + +namespace Lucene.Net.Util +{ + + /// Provides support for converting byte sequences to Strings and back again. + /// The resulting Strings preserve the original byte sequences' sort order. + /// + /// The Strings are constructed using a Base 8000h encoding of the original + /// binary data - each char of an encoded String represents a 15-bit chunk + /// from the byte sequence. Base 8000h was chosen because it allows for all + /// lower 15 bits of char to be used without restriction; the surrogate range + /// [U+D8000-U+DFFF] does not represent valid chars, and would require + /// complicated handling to avoid them and allow use of char's high bit. + /// + /// Although unset bits are used as padding in the final char, the original + /// byte sequence could contain trailing bytes with no set bits (null bytes): + /// padding is indistinguishable from valid information. To overcome this + /// problem, a char is appended, indicating the number of encoded bytes in the + /// final content char. + /// + /// This class's operations are defined over CharBuffers and ByteBuffers, to + /// allow for wrapped arrays to be reused, reducing memory allocation costs for + /// repeated operations. Note that this class calls array() and arrayOffset() + /// on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be + /// used. This class interprets the arrayOffset() and limit() values returned by + /// its input buffers as beginning and end+1 positions on the wrapped array, + /// resprectively; similarly, on the output buffer, arrayOffset() is the first + /// position written to, and limit() is set to one past the final output array + /// position. + /// + public class IndexableBinaryStringTools + { + + private static readonly CodingCase[] CODING_CASES = new CodingCase[]{new CodingCase(7, 1), new CodingCase(14, 6, 2), new CodingCase(13, 5, 3), new CodingCase(12, 4, 4), new CodingCase(11, 3, 5), new CodingCase(10, 2, 6), new CodingCase(9, 1, 7), new CodingCase(8, 0)}; + + // Export only static methods + private IndexableBinaryStringTools() + { + } + + /// Returns the number of chars required to encode the given byte sequence. + /// + /// + /// The byte sequence to be encoded. Must be backed by an array. + /// + /// The number of chars required to encode the given byte sequence + /// + /// IllegalArgumentException If the given ByteBuffer is not backed by an array + public static int GetEncodedLength(System.Collections.Generic.List original) + { + return (original.Count == 0) ? 0 : ((original.Count * 8 + 14) / 15) + 1; + } + + /// Returns the number of bytes required to decode the given char sequence. + /// + /// + /// The char sequence to be encoded. Must be backed by an array. + /// + /// The number of bytes required to decode the given char sequence + /// + /// IllegalArgumentException If the given CharBuffer is not backed by an array + public static int GetDecodedLength(System.Collections.Generic.List encoded) + { + int numChars = encoded.Count - 1; + if (numChars <= 0) + { + return 0; + } + else + { + int numFullBytesInFinalChar = encoded[encoded.Count - 1]; + int numEncodedChars = numChars - 1; + return ((numEncodedChars * 15 + 7) / 8 + numFullBytesInFinalChar); + } + } + + /// Encodes the input byte sequence into the output char sequence. Before + /// calling this method, ensure that the output CharBuffer has sufficient + /// capacity by calling . + /// + /// + /// The byte sequence to encode + /// + /// Where the char sequence encoding result will go. The limit + /// is set to one past the position of the final char. + /// + /// IllegalArgumentException If either the input or the output buffer + /// is not backed by an array + /// + public static void Encode(System.Collections.Generic.List input, System.Collections.Generic.List output) + { + int outputLength = GetEncodedLength(input); + // only adjust capacity if needed + if (output.Capacity < outputLength) + { + output.Capacity = outputLength; + } + + // ensure the buffer we are writing into is occupied with nulls + if (output.Count < outputLength) + { + for (int i = output.Count; i < outputLength; i++) + { + output.Add(Char.MinValue); + } + } + + if (input.Count > 0) + { + int inputByteNum = 0; + int caseNum = 0; + int outputCharNum = 0; + CodingCase codingCase; + for (; inputByteNum + CODING_CASES[caseNum].numBytes <= input.Count; ++outputCharNum) + { + codingCase = CODING_CASES[caseNum]; + if (2 == codingCase.numBytes) + { + output[outputCharNum] = (char)(((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((Number.URShift((input[inputByteNum + 1] & 0xFF), codingCase.finalShift)) & codingCase.finalMask) & (short)0x7FFF); + } + else + { + // numBytes is 3 + output[outputCharNum] = (char)(((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((input[inputByteNum + 1] & 0xFF) << codingCase.middleShift) + ((Number.URShift((input[inputByteNum + 2] & 0xFF), codingCase.finalShift)) & codingCase.finalMask) & (short)0x7FFF); + } + inputByteNum += codingCase.advanceBytes; + if (++caseNum == CODING_CASES.Length) + { + caseNum = 0; + } + } + // Produce final char (if any) and trailing count chars. + codingCase = CODING_CASES[caseNum]; + + if (inputByteNum + 1 < input.Count) + { + // codingCase.numBytes must be 3 + output[outputCharNum++] = (char) ((((input[inputByteNum] & 0xFF) << codingCase.initialShift) + ((input[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF); + // Add trailing char containing the number of full bytes in final char + output[outputCharNum++] = (char) 1; + } + else if (inputByteNum < input.Count) + { + output[outputCharNum++] = (char) (((input[inputByteNum] & 0xFF) << codingCase.initialShift) & (short) 0x7FFF); + // Add trailing char containing the number of full bytes in final char + output[outputCharNum++] = caseNum == 0?(char) 1:(char) 0; + } + else + { + // No left over bits - last char is completely filled. + // Add trailing char containing the number of full bytes in final char + output[outputCharNum++] = (char) 1; + } + } + } + + /// Decodes the input char sequence into the output byte sequence. Before + /// calling this method, ensure that the output ByteBuffer has sufficient + /// capacity by calling . + /// + /// + /// The char sequence to decode + /// + /// Where the byte sequence decoding result will go. The limit + /// is set to one past the position of the final char. + /// + /// IllegalArgumentException If either the input or the output buffer + /// is not backed by an array + /// + public static void Decode(System.Collections.Generic.List input, System.Collections.Generic.List output) + { + int numOutputBytes = GetDecodedLength(input); + if (output.Capacity < numOutputBytes) + { + output.Capacity = numOutputBytes; + } + + // ensure the buffer we are writing into is occupied with nulls + if (output.Count < numOutputBytes) + { + for (int i = output.Count; i < numOutputBytes; i++) + { + output.Add(Byte.MinValue); + } + } + + if (input.Count > 0) + { + int caseNum = 0; + int outputByteNum = 0; + int inputCharNum = 0; + short inputChar; + CodingCase codingCase; + for (; inputCharNum < input.Count - 2; ++inputCharNum) + { + codingCase = CODING_CASES[caseNum]; + inputChar = (short) input[inputCharNum]; + if (2 == codingCase.numBytes) + { + if (0 == caseNum) + { + output[outputByteNum] = (byte) (Number.URShift(inputChar, codingCase.initialShift)); + } + else + { + output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift))); + } + output[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift); + } + else + { + // numBytes is 3 + output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift))); + output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.middleMask), codingCase.middleShift)); + output[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift); + } + outputByteNum += codingCase.advanceBytes; + if (++caseNum == CODING_CASES.Length) + { + caseNum = 0; + } + } + // Handle final char + inputChar = (short) input[inputCharNum]; + codingCase = CODING_CASES[caseNum]; + if (0 == caseNum) + { + output[outputByteNum] = 0; + } + output[outputByteNum] = (byte) (output[outputByteNum] + (byte) (Number.URShift(inputChar, codingCase.initialShift))); + long bytesLeft = numOutputBytes - outputByteNum; + if (bytesLeft > 1) + { + if (2 == codingCase.numBytes) + { + output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.finalMask), codingCase.finalShift)); + } + else + { + // numBytes is 3 + output[outputByteNum + 1] = (byte) (Number.URShift((inputChar & codingCase.middleMask), codingCase.middleShift)); + if (bytesLeft > 2) + { + output[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift); + } + } + } + } + } + + /// Decodes the given char sequence, which must have been encoded by + /// or + /// . + /// + /// + /// The char sequence to decode + /// + /// A byte sequence containing the decoding result. The limit + /// is set to one past the position of the final char. + /// + /// IllegalArgumentException If the input buffer is not backed by an + /// array + /// + public static System.Collections.Generic.List Decode(System.Collections.Generic.List input) + { + System.Collections.Generic.List output = + new System.Collections.Generic.List(new byte[GetDecodedLength(input)]); + Decode(input, output); + return output; + } + + /// Encodes the input byte sequence. + /// + /// + /// The byte sequence to encode + /// + /// A char sequence containing the encoding result. The limit is set + /// to one past the position of the final char. + /// + /// IllegalArgumentException If the input buffer is not backed by an + /// array + /// + public static System.Collections.Generic.List Encode(System.Collections.Generic.List input) + { + System.Collections.Generic.List output = + new System.Collections.Generic.List(new char[GetEncodedLength(input)]); + Encode(input, output); + return output; + } + + internal class CodingCase + { + internal int numBytes, initialShift, middleShift, finalShift, advanceBytes = 2; + internal short middleMask, finalMask; + + internal CodingCase(int initialShift, int middleShift, int finalShift) + { + this.numBytes = 3; + this.initialShift = initialShift; + this.middleShift = middleShift; + this.finalShift = finalShift; + this.finalMask = (short) (Number.URShift((short) 0xFF, finalShift)); + this.middleMask = (short) ((short) 0xFF << middleShift); + } + + internal CodingCase(int initialShift, int finalShift) + { + this.numBytes = 2; + this.initialShift = initialShift; + this.finalShift = finalShift; + this.finalMask = (short) (Number.URShift((short) 0xFF, finalShift)); + if (finalShift != 0) + { + advanceBytes = 1; + } + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/MapOfSets.cs b/external/Lucene.Net.Light/src/core/Util/MapOfSets.cs new file mode 100644 index 0000000000..ee997f4cc9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/MapOfSets.cs @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Util +{ + + /// Helper class for keeping Listss of Objects associated with keys. WARNING: THIS CLASS IS NOT THREAD SAFE + public class MapOfSets + { + private IDictionary> theMap; + + /// the backing store for this object + /// + public MapOfSets(IDictionary> m) + { + theMap = m; + } + + /// direct access to the map backing this object. + public virtual IDictionary> Map + { + get { return theMap; } + } + + /// Adds val to the Set associated with key in the Map. If key is not + /// already in the map, a new Set will first be created. + /// + /// the size of the Set associated with key once val is added to it. + /// + public virtual int Put(TKey key, TValue val) + { + HashSet theSet; + if (!theMap.TryGetValue(key, out theSet)) + { + theSet = new HashSet(); + theMap[key] = theSet; + } + theSet.Add(val); + return theSet.Count; + } + /// Adds multiple vals to the Set associated with key in the Map. + /// If key is not + /// already in the map, a new Set will first be created. + /// + /// the size of the Set associated with key once val is added to it. + /// + public virtual int PutAll(TKey key, IEnumerable vals) + { + HashSet theSet; + if (!theMap.TryGetValue(key, out theSet)) + { + theSet = new HashSet(); + theMap[key] = theSet; + } + theSet.UnionWith(vals); + return theSet.Count; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/MemoryModel.cs b/external/Lucene.Net.Light/src/core/Util/MemoryModel.cs new file mode 100644 index 0000000000..ad5091aae9 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/MemoryModel.cs @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + /// Returns primitive memory sizes for estimating RAM usage. + /// + /// + public abstract class MemoryModel + { + /// size of array beyond contents + public abstract int ArraySize { get; } + + /// Class size overhead + public abstract int ClassSize { get; } + + /// a primitive Class - bool, byte, char, short, long, float, + /// short, double, int + /// + /// the size in bytes of given primitive Class + /// + public abstract int GetPrimitiveSize(System.Type clazz); + + /// size of reference + public abstract int ReferenceSize { get; } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/NumericUtils.cs b/external/Lucene.Net.Light/src/core/Util/NumericUtils.cs new file mode 100644 index 0000000000..1bd68c21f7 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/NumericUtils.cs @@ -0,0 +1,488 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Documents; +using Lucene.Net.Search; +using Lucene.Net.Support; +using NumericTokenStream = Lucene.Net.Analysis.NumericTokenStream; + +namespace Lucene.Net.Util +{ + + /// This is a helper class to generate prefix-encoded representations for numerical values + /// and supplies converters to represent float/double values as sortable integers/longs. + /// + ///

To quickly execute range queries in Apache Lucene, a range is divided recursively + /// into multiple intervals for searching: The center of the range is searched only with + /// the lowest possible precision in the trie, while the boundaries are matched + /// more exactly. This reduces the number of terms dramatically. + /// + ///

This class generates terms to achive this: First the numerical integer values need to + /// be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned + /// and the bits are converted to ASCII chars with each 7 bit. The resulting string is + /// sortable like the original integer value. Each value is also prefixed + /// (in the first char) by the shift value (number of bits removed) used + /// during encoding. + /// + ///

To also index floating point numbers, this class supplies two methods to convert them + /// to integer values by changing their bit layout: , + /// . You will have no precision loss by + /// converting floating point numbers to integers and back (only that the integer form + /// is not usable). Other data types like dates can easily converted to longs or ints (e.g. + /// date to long: ). + /// + ///

For easy usage, the trie algorithm is implemented for indexing inside + /// that can index int, long, + /// float, and double. For querying, + /// and implement the query part + /// for the same data types. + /// + ///

This class can also be used, to generate lexicographically sortable (according + /// ) representations of numeric data types for other + /// usages (e.g. sorting). + /// + ///

NOTE: This API is experimental and + /// might change in incompatible ways in the next release. + /// + ///

+ /// 2.9 + /// + public sealed class NumericUtils + { + + private NumericUtils() + { + } // no instance! + + /// The default precision step used by , , + /// , and as default + /// + public const int PRECISION_STEP_DEFAULT = 4; + + /// Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is + /// stored as SHIFT_START_LONG+shift in the first character + /// + public static char SHIFT_START_LONG = (char) 0x20; + + /// Expert: The maximum term length (used for char[] buffer size) + /// for encoding long values. + /// + /// + /// + public const int BUF_SIZE_LONG = 63 / 7 + 2; + + /// Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is + /// stored as SHIFT_START_INT+shift in the first character + /// + public static char SHIFT_START_INT = (char) 0x60; + + /// Expert: The maximum term length (used for char[] buffer size) + /// for encoding int values. + /// + /// + /// + public const int BUF_SIZE_INT = 31 / 7 + 2; + + /// Expert: Returns prefix coded bits after reducing the precision by shift bits. + /// This is method is used by . + /// + /// the numeric value + /// + /// how many bits to strip from the right + /// + /// that will contain the encoded chars, must be at least of + /// length + /// + /// number of chars written to buffer + /// + public static int LongToPrefixCoded(long val, int shift, char[] buffer) + { + if (shift > 63 || shift < 0) + throw new System.ArgumentException("Illegal shift value, must be 0..63"); + int nChars = (63 - shift) / 7 + 1, len = nChars + 1; + buffer[0] = (char) (SHIFT_START_LONG + shift); + ulong sortableBits = BitConverter.ToUInt64(BitConverter.GetBytes(val), 0) ^ 0x8000000000000000L; + sortableBits = sortableBits >> shift; + while (nChars >= 1) + { + // Store 7 bits per character for good efficiency when UTF-8 encoding. + // The whole number is right-justified so that lucene can prefix-encode + // the terms more efficiently. + buffer[nChars--] = (char) (sortableBits & 0x7f); + sortableBits = sortableBits >> 7; + } + return len; + } + + /// Expert: Returns prefix coded bits after reducing the precision by shift bits. + /// This is method is used by . + /// + /// the numeric value + /// + /// how many bits to strip from the right + /// + public static System.String LongToPrefixCoded(long val, int shift) + { + char[] buffer = new char[BUF_SIZE_LONG]; + int len = LongToPrefixCoded(val, shift, buffer); + return new System.String(buffer, 0, len); + } + + /// This is a convenience method, that returns prefix coded bits of a long without + /// reducing the precision. It can be used to store the full precision value as a + /// stored field in index. + ///

To decode, use . + ///

+ public static System.String LongToPrefixCoded(long val) + { + return LongToPrefixCoded(val, 0); + } + + /// Expert: Returns prefix coded bits after reducing the precision by shift bits. + /// This is method is used by . + /// + /// the numeric value + /// + /// how many bits to strip from the right + /// + /// that will contain the encoded chars, must be at least of + /// length + /// + /// number of chars written to buffer + /// + public static int IntToPrefixCoded(int val, int shift, char[] buffer) + { + if (shift > 31 || shift < 0) + throw new System.ArgumentException("Illegal shift value, must be 0..31"); + int nChars = (31 - shift) / 7 + 1, len = nChars + 1; + buffer[0] = (char) (SHIFT_START_INT + shift); + int sortableBits = val ^ unchecked((int) 0x80000000); + sortableBits = Number.URShift(sortableBits, shift); + while (nChars >= 1) + { + // Store 7 bits per character for good efficiency when UTF-8 encoding. + // The whole number is right-justified so that lucene can prefix-encode + // the terms more efficiently. + buffer[nChars--] = (char) (sortableBits & 0x7f); + sortableBits = Number.URShift(sortableBits, 7); + } + return len; + } + + /// Expert: Returns prefix coded bits after reducing the precision by shift bits. + /// This is method is used by . + /// + /// the numeric value + /// + /// how many bits to strip from the right + /// + public static System.String IntToPrefixCoded(int val, int shift) + { + char[] buffer = new char[BUF_SIZE_INT]; + int len = IntToPrefixCoded(val, shift, buffer); + return new System.String(buffer, 0, len); + } + + /// This is a convenience method, that returns prefix coded bits of an int without + /// reducing the precision. It can be used to store the full precision value as a + /// stored field in index. + ///

To decode, use . + ///

+ public static System.String IntToPrefixCoded(int val) + { + return IntToPrefixCoded(val, 0); + } + + /// Returns a long from prefixCoded characters. + /// Rightmost bits will be zero for lower precision codes. + /// This method can be used to decode e.g. a stored field. + /// + /// NumberFormatException if the supplied string is + /// not correctly prefix encoded. + /// + /// + /// + public static long PrefixCodedToLong(System.String prefixCoded) + { + int shift = prefixCoded[0] - SHIFT_START_LONG; + if (shift > 63 || shift < 0) + throw new System.FormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)"); + ulong sortableBits = 0UL; + for (int i = 1, len = prefixCoded.Length; i < len; i++) + { + sortableBits <<= 7; + char ch = prefixCoded[i]; + if (ch > 0x7f) + { + throw new System.FormatException("Invalid prefixCoded numerical value representation (char " + System.Convert.ToString((int) ch, 16) + " at position " + i + " is invalid)"); + } + sortableBits |= (ulong) ch; + } + return BitConverter.ToInt64(BitConverter.GetBytes((sortableBits << shift) ^ 0x8000000000000000L), 0); + } + + /// Returns an int from prefixCoded characters. + /// Rightmost bits will be zero for lower precision codes. + /// This method can be used to decode e.g. a stored field. + /// + /// NumberFormatException if the supplied string is + /// not correctly prefix encoded. + /// + /// + /// + public static int PrefixCodedToInt(System.String prefixCoded) + { + int shift = prefixCoded[0] - SHIFT_START_INT; + if (shift > 31 || shift < 0) + throw new System.FormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)"); + int sortableBits = 0; + for (int i = 1, len = prefixCoded.Length; i < len; i++) + { + sortableBits <<= 7; + char ch = prefixCoded[i]; + if (ch > 0x7f) + { + throw new System.FormatException("Invalid prefixCoded numerical value representation (char " + System.Convert.ToString((int) ch, 16) + " at position " + i + " is invalid)"); + } + sortableBits |= (int) ch; + } + return (sortableBits << shift) ^ unchecked((int) 0x80000000); + } + + /// Converts a double value to a sortable signed long. + /// The value is converted by getting their IEEE 754 floating-point "double format" + /// bit layout and then some bits are swapped, to be able to compare the result as long. + /// By this the precision is not reduced, but the value can easily used as a long. + /// + /// + /// + public static long DoubleToSortableLong(double val) + { + long f = BitConverter.DoubleToInt64Bits(val); // {{Aroush-2.9}} will this work the same as 'java.lang.Double.doubleToRawLongBits()'? + if (f < 0) + f ^= 0x7fffffffffffffffL; + return f; + } + + /// Convenience method: this just returns: + /// longToPrefixCoded(doubleToSortableLong(val)) + /// + public static System.String DoubleToPrefixCoded(double val) + { + return LongToPrefixCoded(DoubleToSortableLong(val)); + } + + /// Converts a sortable long back to a double. + /// + /// + public static double SortableLongToDouble(long val) + { + if (val < 0) + val ^= 0x7fffffffffffffffL; + return BitConverter.Int64BitsToDouble(val); + } + + /// Convenience method: this just returns: + /// sortableLongToDouble(prefixCodedToLong(val)) + /// + public static double PrefixCodedToDouble(System.String val) + { + return SortableLongToDouble(PrefixCodedToLong(val)); + } + + /// Converts a float value to a sortable signed int. + /// The value is converted by getting their IEEE 754 floating-point "float format" + /// bit layout and then some bits are swapped, to be able to compare the result as int. + /// By this the precision is not reduced, but the value can easily used as an int. + /// + /// + /// + public static int FloatToSortableInt(float val) + { + int f = BitConverter.ToInt32(BitConverter.GetBytes(val), 0); + if (f < 0) + f ^= 0x7fffffff; + return f; + } + + /// Convenience method: this just returns: + /// intToPrefixCoded(floatToSortableInt(val)) + /// + public static System.String FloatToPrefixCoded(float val) + { + return IntToPrefixCoded(FloatToSortableInt(val)); + } + + /// Converts a sortable int back to a float. + /// + /// + public static float SortableIntToFloat(int val) + { + if (val < 0) + val ^= 0x7fffffff; + return BitConverter.ToSingle(BitConverter.GetBytes(val), 0); + } + + /// Convenience method: this just returns: + /// sortableIntToFloat(prefixCodedToInt(val)) + /// + public static float PrefixCodedToFloat(System.String val) + { + return SortableIntToFloat(PrefixCodedToInt(val)); + } + + /// Expert: Splits a long range recursively. + /// You may implement a builder that adds clauses to a + /// for each call to its + /// + /// method. + ///

This method is used by . + ///

+ public static void SplitLongRange(LongRangeBuilder builder, int precisionStep, long minBound, long maxBound) + { + SplitRange(builder, 64, precisionStep, minBound, maxBound); + } + + /// Expert: Splits an int range recursively. + /// You may implement a builder that adds clauses to a + /// for each call to its + /// + /// method. + ///

This method is used by . + ///

+ public static void SplitIntRange(IntRangeBuilder builder, int precisionStep, int minBound, int maxBound) + { + SplitRange(builder, 32, precisionStep, (long) minBound, (long) maxBound); + } + + /// This helper does the splitting for both 32 and 64 bit. + private static void SplitRange(System.Object builder, int valSize, int precisionStep, long minBound, long maxBound) + { + if (precisionStep < 1) + throw new System.ArgumentException("precisionStep must be >=1"); + if (minBound > maxBound) + return ; + for (int shift = 0; ; shift += precisionStep) + { + // calculate new bounds for inner precision + long diff = 1L << (shift + precisionStep); + long mask = ((1L << precisionStep) - 1L) << shift; + bool hasLower = (minBound & mask) != 0L; + bool hasUpper = (maxBound & mask) != mask; + long nextMinBound = (hasLower?(minBound + diff):minBound) & ~ mask; + long nextMaxBound = (hasUpper?(maxBound - diff):maxBound) & ~ mask; + bool lowerWrapped = nextMinBound < minBound, + upperWrapped = nextMaxBound > maxBound; + + if (shift+precisionStep>=valSize || nextMinBound>nextMaxBound || lowerWrapped || upperWrapped) + { + // We are in the lowest precision or the next precision is not available. + AddRange(builder, valSize, minBound, maxBound, shift); + // exit the split recursion loop + break; + } + + if (hasLower) + AddRange(builder, valSize, minBound, minBound | mask, shift); + if (hasUpper) + AddRange(builder, valSize, maxBound & ~ mask, maxBound, shift); + + // recurse to next precision + minBound = nextMinBound; + maxBound = nextMaxBound; + } + } + + /// Helper that delegates to correct range builder + private static void AddRange(System.Object builder, int valSize, long minBound, long maxBound, int shift) + { + // for the max bound set all lower bits (that were shifted away): + // this is important for testing or other usages of the splitted range + // (e.g. to reconstruct the full range). The prefixEncoding will remove + // the bits anyway, so they do not hurt! + maxBound |= (1L << shift) - 1L; + // delegate to correct range builder + switch (valSize) + { + + case 64: + ((LongRangeBuilder) builder).AddRange(minBound, maxBound, shift); + break; + + case 32: + ((IntRangeBuilder) builder).AddRange((int) minBound, (int) maxBound, shift); + break; + + default: + // Should not happen! + throw new System.ArgumentException("valSize must be 32 or 64."); + + } + } + + /// Expert: Callback for . + /// You need to overwrite only one of the methods. + ///

NOTE: This is a very low-level interface, + /// the method signatures may change in later versions. + ///

+ public abstract class LongRangeBuilder + { + + /// Overwrite this method, if you like to receive the already prefix encoded range bounds. + /// You can directly build classical (inclusive) range queries from them. + /// + public virtual void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded) + { + throw new System.NotSupportedException(); + } + + /// Overwrite this method, if you like to receive the raw long range bounds. + /// You can use this for e.g. debugging purposes (print out range bounds). + /// + public virtual void AddRange(long min, long max, int shift) + { + AddRange(Lucene.Net.Util.NumericUtils.LongToPrefixCoded(min, shift), Lucene.Net.Util.NumericUtils.LongToPrefixCoded(max, shift)); + } + } + + /// Expert: Callback for . + /// You need to overwrite only one of the methods. + ///

NOTE: This is a very low-level interface, + /// the method signatures may change in later versions. + ///

+ public abstract class IntRangeBuilder + { + + /// Overwrite this method, if you like to receive the already prefix encoded range bounds. + /// You can directly build classical range (inclusive) queries from them. + /// + public virtual void AddRange(System.String minPrefixCoded, System.String maxPrefixCoded) + { + throw new System.NotSupportedException(); + } + + /// Overwrite this method, if you like to receive the raw int range bounds. + /// You can use this for e.g. debugging purposes (print out range bounds). + /// + public virtual void AddRange(int min, int max, int shift) + { + AddRange(Lucene.Net.Util.NumericUtils.IntToPrefixCoded(min, shift), Lucene.Net.Util.NumericUtils.IntToPrefixCoded(max, shift)); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/OpenBitSet.cs b/external/Lucene.Net.Light/src/core/Util/OpenBitSet.cs new file mode 100644 index 0000000000..d7c0979a3e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/OpenBitSet.cs @@ -0,0 +1,944 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using DocIdSet = Lucene.Net.Search.DocIdSet; +using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + +namespace Lucene.Net.Util +{ + + /// An "open" BitSet implementation that allows direct access to the array of words + /// storing the bits. + ///

+ /// Unlike java.util.bitset, the fact that bits are packed into an array of longs + /// is part of the interface. This allows efficient implementation of other algorithms + /// by someone other than the author. It also allows one to efficiently implement + /// alternate serialization or interchange formats. + ///

+ /// OpenBitSet is faster than java.util.BitSet in most operations + /// and *much* faster at calculating cardinality of sets and results of set operations. + /// It can also handle sets of larger cardinality (up to 64 * 2**32-1) + ///

+ /// The goals of OpenBitSet are the fastest implementation possible, and + /// maximum code reuse. Extra safety and encapsulation + /// may always be built on top, but if that's built in, the cost can never be removed (and + /// hence people re-implement their own version in order to get better performance). + /// If you want a "safe", totally encapsulated (and slower and limited) BitSet + /// class, use java.util.BitSet. + ///

+ ///

Performance Results

+ /// + /// Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M + ///
BitSet size = 1,000,000 + ///
Results are java.util.BitSet time divided by OpenBitSet time. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
cardinality intersect_count union nextSetBit get iterator
50% full 3.36 3.96 1.44 1.46 1.99 1.58
1% full 3.31 3.90   1.04   0.99
+ ///
+ /// Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M + ///
BitSet size = 1,000,000 + ///
Results are java.util.BitSet time divided by OpenBitSet time. + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + ///
cardinality intersect_count union nextSetBit get iterator
50% full 2.50 3.50 1.00 1.03 1.12 1.25
1% full 2.51 3.49   1.00   1.02
+ ///
+ /// $Id$ + /// + + [Serializable] + public class OpenBitSet:DocIdSet, System.ICloneable + { + protected internal long[] internalbits; + protected internal int wlen; // number of words (elements) used in the array + + /// Constructs an OpenBitSet large enough to hold numBits. + /// + /// + /// + /// + public OpenBitSet(long numBits) + { + internalbits = new long[Bits2words(numBits)]; + wlen = internalbits.Length; + } + + public OpenBitSet():this(64) + { + } + + /// Constructs an OpenBitSet from an existing long[]. + ///
+ /// The first 64 bits are in long[0], + /// with bit index 0 at the least significant bit, and bit index 63 at the most significant. + /// Given a bit index, + /// the word containing it is long[index/64], and it is at bit number index%64 within that word. + ///

+ /// numWords are the number of elements in the array that contain + /// set bits (non-zero longs). + /// numWords should be <= bits.length, and + /// any existing words in the array at position >= numWords should be zero. + /// + ///

+ public OpenBitSet(long[] bits, int numWords) + { + this.internalbits = bits; + this.wlen = numWords; + } + + public override DocIdSetIterator Iterator() + { + return new OpenBitSetIterator(internalbits, wlen); + } + + /// This DocIdSet implementation is cacheable. + public override bool IsCacheable + { + get { return true; } + } + + /// Returns the current capacity in bits (1 greater than the index of the last bit) + public virtual long Capacity() + { + return internalbits.Length << 6; + } + + /// Returns the current capacity of this set. Included for + /// compatibility. This is *not* equal to + /// + public virtual long Size() + { + return Capacity(); + } + + /// Returns true if there are no set bits + public virtual bool IsEmpty() + { + return Cardinality() == 0; + } + + /// Expert: Gets or sets the long[] storing the bits + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Performance", "CA1819:PropertiesShouldNotReturnArrays")] + public virtual long[] Bits + { + set { this.internalbits = value; } + get { return internalbits; } + } + + /// Expert: gets or sets the number of longs in the array that are in use + public virtual int NumWords + { + get { return wlen; } + set { this.wlen = value; } + } + + + /// Returns true or false for the specified bit index. + public virtual bool Get(int index) + { + int i = index >> 6; // div 64 + // signed shift will keep a negative index and force an + // array-index-out-of-bounds-exception, removing the need for an explicit check. + if (i >= internalbits.Length) + return false; + + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (internalbits[i] & bitmask) != 0; + } + + + /// Returns true or false for the specified bit index. + /// The index should be less than the OpenBitSet size + /// + public virtual bool FastGet(int index) + { + int i = index >> 6; // div 64 + // signed shift will keep a negative index and force an + // array-index-out-of-bounds-exception, removing the need for an explicit check. + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (internalbits[i] & bitmask) != 0; + } + + + + /// Returns true or false for the specified bit index + public virtual bool Get(long index) + { + int i = (int) (index >> 6); // div 64 + if (i >= internalbits.Length) + return false; + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (internalbits[i] & bitmask) != 0; + } + + /// Returns true or false for the specified bit index. + /// The index should be less than the OpenBitSet size. + /// + public virtual bool FastGet(long index) + { + int i = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + return (internalbits[i] & bitmask) != 0; + } + + /* + // alternate implementation of get() + public boolean get1(int index) { + int i = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + return ((bits[i]>>>bit) & 0x01) != 0; + // this does a long shift and a bittest (on x86) vs + // a long shift, and a long AND, (the test for zero is prob a no-op) + // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0; + } + */ + + + /// returns 1 if the bit is set, 0 if not. + /// The index should be less than the OpenBitSet size + /// + public virtual int GetBit(int index) + { + int i = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + return ((int )((ulong) (internalbits[i]) >> bit)) & 0x01; + } + + + /* + public boolean get2(int index) { + int word = index >> 6; // div 64 + int bit = index & 0x0000003f; // mod 64 + return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed + // we could right shift and check for parity bit, if it was available to us. + } + */ + + /// sets a bit, expanding the set size if necessary + public virtual void Set(long index) + { + int wordNum = ExpandingWordNum(index); + int bit = (int) index & 0x3f; + long bitmask = 1L << bit; + internalbits[wordNum] |= bitmask; + } + + + /// Sets the bit at the specified index. + /// The index should be less than the OpenBitSet size. + /// + public virtual void FastSet(int index) + { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + internalbits[wordNum] |= bitmask; + } + + /// Sets the bit at the specified index. + /// The index should be less than the OpenBitSet size. + /// + public virtual void FastSet(long index) + { + int wordNum = (int) (index >> 6); + int bit = (int) index & 0x3f; + long bitmask = 1L << bit; + internalbits[wordNum] |= bitmask; + } + + /// Sets a range of bits, expanding the set size if necessary + /// + /// + /// lower index + /// + /// one-past the last bit to set + /// + public virtual void Set(long startIndex, long endIndex) + { + if (endIndex <= startIndex) + return ; + + int startWord = (int) (startIndex >> 6); + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = ExpandingWordNum(endIndex - 1); + + long startmask = - 1L << (int) startIndex; + long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + if (startWord == endWord) + { + internalbits[startWord] |= (startmask & endmask); + return ; + } + + internalbits[startWord] |= startmask; + for (int i = startWord + 1; i < endWord; i++) + internalbits[i] = -1L; + internalbits[endWord] |= endmask; + } + + + + protected internal virtual int ExpandingWordNum(long index) + { + int wordNum = (int) (index >> 6); + if (wordNum >= wlen) + { + EnsureCapacity(index + 1); + wlen = wordNum + 1; + } + return wordNum; + } + + + /// clears a bit. + /// The index should be less than the OpenBitSet size. + /// + public virtual void FastClear(int index) + { + int wordNum = index >> 6; + int bit = index & 0x03f; + long bitmask = 1L << bit; + internalbits[wordNum] &= ~ bitmask; + // hmmm, it takes one more instruction to clear than it does to set... any + // way to work around this? If there were only 63 bits per word, we could + // use a right shift of 10111111...111 in binary to position the 0 in the + // correct place (using sign extension). + // Could also use Long.rotateRight() or rotateLeft() *if* they were converted + // by the JVM into a native instruction. + // bits[word] &= Long.rotateLeft(0xfffffffe,bit); + } + + /// clears a bit. + /// The index should be less than the OpenBitSet size. + /// + public virtual void FastClear(long index) + { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + internalbits[wordNum] &= ~ bitmask; + } + + /// clears a bit, allowing access beyond the current set size without changing the size. + public virtual void Clear(long index) + { + int wordNum = (int) (index >> 6); // div 64 + if (wordNum >= wlen) + return ; + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + internalbits[wordNum] &= ~ bitmask; + } + + /// Clears a range of bits. Clearing past the end does not change the size of the set. + /// + /// + /// lower index + /// + /// one-past the last bit to clear + /// + public virtual void Clear(int startIndex, int endIndex) + { + if (endIndex <= startIndex) + return ; + + int startWord = (startIndex >> 6); + if (startWord >= wlen) + return ; + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = ((endIndex - 1) >> 6); + + long startmask = - 1L << startIndex; + long endmask = (long) (0xffffffffffffffffUL >> - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + // invert masks since we are clearing + startmask = ~ startmask; + endmask = ~ endmask; + + if (startWord == endWord) + { + internalbits[startWord] &= (startmask | endmask); + return ; + } + + internalbits[startWord] &= startmask; + + int middle = System.Math.Min(wlen, endWord); + for (int i = startWord + 1; i < middle; i++) + internalbits[i] = 0L; + if (endWord < wlen) + { + internalbits[endWord] &= endmask; + } + } + + + /// Clears a range of bits. Clearing past the end does not change the size of the set. + /// + /// + /// lower index + /// + /// one-past the last bit to clear + /// + public virtual void Clear(long startIndex, long endIndex) + { + if (endIndex <= startIndex) + return ; + + int startWord = (int) (startIndex >> 6); + if (startWord >= wlen) + return ; + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = (int) ((endIndex - 1) >> 6); + + long startmask = - 1L << (int) startIndex; + long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + // invert masks since we are clearing + startmask = ~ startmask; + endmask = ~ endmask; + + if (startWord == endWord) + { + internalbits[startWord] &= (startmask | endmask); + return ; + } + + internalbits[startWord] &= startmask; + + int middle = System.Math.Min(wlen, endWord); + for (int i = startWord + 1; i < middle; i++) + internalbits[i] = 0L; + if (endWord < wlen) + { + internalbits[endWord] &= endmask; + } + } + + + + /// Sets a bit and returns the previous value. + /// The index should be less than the OpenBitSet size. + /// + public virtual bool GetAndSet(int index) + { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bool val = (internalbits[wordNum] & bitmask) != 0; + internalbits[wordNum] |= bitmask; + return val; + } + + /// Sets a bit and returns the previous value. + /// The index should be less than the OpenBitSet size. + /// + public virtual bool GetAndSet(long index) + { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + bool val = (internalbits[wordNum] & bitmask) != 0; + internalbits[wordNum] |= bitmask; + return val; + } + + /// flips a bit. + /// The index should be less than the OpenBitSet size. + /// + public virtual void FastFlip(int index) + { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + internalbits[wordNum] ^= bitmask; + } + + /// flips a bit. + /// The index should be less than the OpenBitSet size. + /// + public virtual void FastFlip(long index) + { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + internalbits[wordNum] ^= bitmask; + } + + /// flips a bit, expanding the set size if necessary + public virtual void Flip(long index) + { + int wordNum = ExpandingWordNum(index); + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + internalbits[wordNum] ^= bitmask; + } + + /// flips a bit and returns the resulting bit value. + /// The index should be less than the OpenBitSet size. + /// + public virtual bool FlipAndGet(int index) + { + int wordNum = index >> 6; // div 64 + int bit = index & 0x3f; // mod 64 + long bitmask = 1L << bit; + internalbits[wordNum] ^= bitmask; + return (internalbits[wordNum] & bitmask) != 0; + } + + /// flips a bit and returns the resulting bit value. + /// The index should be less than the OpenBitSet size. + /// + public virtual bool FlipAndGet(long index) + { + int wordNum = (int) (index >> 6); // div 64 + int bit = (int) index & 0x3f; // mod 64 + long bitmask = 1L << bit; + internalbits[wordNum] ^= bitmask; + return (internalbits[wordNum] & bitmask) != 0; + } + + /// Flips a range of bits, expanding the set size if necessary + /// + /// + /// lower index + /// + /// one-past the last bit to flip + /// + public virtual void Flip(long startIndex, long endIndex) + { + if (endIndex <= startIndex) + return ; + int startWord = (int) (startIndex >> 6); + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = ExpandingWordNum(endIndex - 1); + + /* Grrr, java shifting wraps around so -1L>>>64 == -1 + * for that reason, make sure not to use endmask if the bits to flip will + * be zero in the last word (redefine endWord to be the last changed...) + long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000 + long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111 + ***/ + + long startmask = - 1L << (int) startIndex; + long endmask = (long) (0xffffffffffffffffUL >> (int) - endIndex); // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + if (startWord == endWord) + { + internalbits[startWord] ^= (startmask & endmask); + return ; + } + + internalbits[startWord] ^= startmask; + + for (int i = startWord + 1; i < endWord; i++) + { + internalbits[i] = ~ internalbits[i]; + } + + internalbits[endWord] ^= endmask; + } + + + /* + public static int pop(long v0, long v1, long v2, long v3) { + // derived from pop_array by setting last four elems to 0. + // exchanges one pop() call for 10 elementary operations + // saving about 7 instructions... is there a better way? + long twosA=v0 & v1; + long ones=v0^v1; + + long u2=ones^v2; + long twosB =(ones&v2)|(u2&v3); + ones=u2^v3; + + long fours=(twosA&twosB); + long twos=twosA^twosB; + + return (pop(fours)<<2) + + (pop(twos)<<1) + + pop(ones); + + } + */ + + + /// the number of set bits + /// + public virtual long Cardinality() + { + return BitUtil.Pop_array(internalbits, 0, wlen); + } + + /// Returns the popcount or cardinality of the intersection of the two sets. + /// Neither set is modified. + /// + public static long IntersectionCount(OpenBitSet a, OpenBitSet b) + { + return BitUtil.Pop_intersect(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen)); + } + + /// Returns the popcount or cardinality of the union of the two sets. + /// Neither set is modified. + /// + public static long UnionCount(OpenBitSet a, OpenBitSet b) + { + long tot = BitUtil.Pop_union(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen)); + if (a.wlen < b.wlen) + { + tot += BitUtil.Pop_array(b.internalbits, a.wlen, b.wlen - a.wlen); + } + else if (a.wlen > b.wlen) + { + tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen); + } + return tot; + } + + /// Returns the popcount or cardinality of "a and not b" + /// or "intersection(a, not(b))". + /// Neither set is modified. + /// + public static long AndNotCount(OpenBitSet a, OpenBitSet b) + { + long tot = BitUtil.Pop_andnot(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen)); + if (a.wlen > b.wlen) + { + tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen); + } + return tot; + } + + /// Returns the popcount or cardinality of the exclusive-or of the two sets. + /// Neither set is modified. + /// + public static long XorCount(OpenBitSet a, OpenBitSet b) + { + long tot = BitUtil.Pop_xor(a.internalbits, b.internalbits, 0, System.Math.Min(a.wlen, b.wlen)); + if (a.wlen < b.wlen) + { + tot += BitUtil.Pop_array(b.internalbits, a.wlen, b.wlen - a.wlen); + } + else if (a.wlen > b.wlen) + { + tot += BitUtil.Pop_array(a.internalbits, b.wlen, a.wlen - b.wlen); + } + return tot; + } + + + /// Returns the index of the first set bit starting at the index specified. + /// -1 is returned if there are no more set bits. + /// + public virtual int NextSetBit(int index) + { + int i = index >> 6; + if (i >= wlen) + return - 1; + int subIndex = index & 0x3f; // index within the word + long word = internalbits[i] >> subIndex; // skip all the bits to the right of index + + if (word != 0) + { + return (i << 6) + subIndex + BitUtil.Ntz(word); + } + + while (++i < wlen) + { + word = internalbits[i]; + if (word != 0) + return (i << 6) + BitUtil.Ntz(word); + } + + return - 1; + } + + /// Returns the index of the first set bit starting at the index specified. + /// -1 is returned if there are no more set bits. + /// + public virtual long NextSetBit(long index) + { + int i = (int) (index >> 6); + if (i >= wlen) + return - 1; + int subIndex = (int) index & 0x3f; // index within the word + long word = (long) ((ulong) internalbits[i] >> subIndex); // skip all the bits to the right of index + + if (word != 0) + { + return (((long) i) << 6) + (subIndex + BitUtil.Ntz(word)); + } + + while (++i < wlen) + { + word = internalbits[i]; + if (word != 0) + return (((long) i) << 6) + BitUtil.Ntz(word); + } + + return - 1; + } + + + + + public virtual System.Object Clone() + { + try + { + OpenBitSet obs = new OpenBitSet((long[]) internalbits.Clone(), wlen); + //obs.bits = new long[obs.bits.Length]; + //obs.bits.CopyTo(obs.bits, 0); // hopefully an array clone is as fast(er) than arraycopy + return obs; + } + catch (System.Exception e) + { + throw new System.SystemException(e.Message, e); + } + } + + /// this = this AND other + public virtual void Intersect(OpenBitSet other) + { + int newLen = System.Math.Min(this.wlen, other.wlen); + long[] thisArr = this.internalbits; + long[] otherArr = other.internalbits; + // testing against zero can be more efficient + int pos = newLen; + while (--pos >= 0) + { + thisArr[pos] &= otherArr[pos]; + } + if (this.wlen > newLen) + { + // fill zeros from the new shorter length to the old length + for (int i = newLen; i < this.wlen; i++) + internalbits[i] = 0L; + } + this.wlen = newLen; + } + + /// this = this OR other + public virtual void Union(OpenBitSet other) + { + int newLen = System.Math.Max(wlen, other.wlen); + EnsureCapacityWords(newLen); + + long[] thisArr = this.internalbits; + long[] otherArr = other.internalbits; + int pos = System.Math.Min(wlen, other.wlen); + while (--pos >= 0) + { + thisArr[pos] |= otherArr[pos]; + } + if (this.wlen < newLen) + { + Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen); + } + this.wlen = newLen; + } + + + /// Remove all elements set in other. this = this AND_NOT other + public virtual void Remove(OpenBitSet other) + { + int idx = System.Math.Min(wlen, other.wlen); + long[] thisArr = this.internalbits; + long[] otherArr = other.internalbits; + while (--idx >= 0) + { + thisArr[idx] &= ~ otherArr[idx]; + } + } + + /// this = this XOR other + public virtual void Xor(OpenBitSet other) + { + int newLen = System.Math.Max(wlen, other.wlen); + EnsureCapacityWords(newLen); + + long[] thisArr = this.internalbits; + long[] otherArr = other.internalbits; + int pos = System.Math.Min(wlen, other.wlen); + while (--pos >= 0) + { + thisArr[pos] ^= otherArr[pos]; + } + if (this.wlen < newLen) + { + Array.Copy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen); + } + this.wlen = newLen; + } + + + // some BitSet compatability methods + + //* see */ + public virtual void And(OpenBitSet other) + { + Intersect(other); + } + + //* see */ + public virtual void Or(OpenBitSet other) + { + Union(other); + } + + //* see */ + public virtual void AndNot(OpenBitSet other) + { + Remove(other); + } + + /// returns true if the sets have any elements in common + public virtual bool Intersects(OpenBitSet other) + { + int pos = System.Math.Min(this.wlen, other.wlen); + long[] thisArr = this.internalbits; + long[] otherArr = other.internalbits; + while (--pos >= 0) + { + if ((thisArr[pos] & otherArr[pos]) != 0) + return true; + } + return false; + } + + + + /// Expand the long[] with the size given as a number of words (64 bit longs). + /// getNumWords() is unchanged by this call. + /// + public virtual void EnsureCapacityWords(int numWords) + { + if (internalbits.Length < numWords) + { + internalbits = ArrayUtil.Grow(internalbits, numWords); + } + } + + /// Ensure that the long[] is big enough to hold numBits, expanding it if necessary. + /// getNumWords() is unchanged by this call. + /// + public virtual void EnsureCapacity(long numBits) + { + EnsureCapacityWords(Bits2words(numBits)); + } + + /// Lowers numWords, the number of words in use, + /// by checking for trailing zero words. + /// + public virtual void TrimTrailingZeros() + { + int idx = wlen - 1; + while (idx >= 0 && internalbits[idx] == 0) + idx--; + wlen = idx + 1; + } + + /// returns the number of 64 bit words it would take to hold numBits + public static int Bits2words(long numBits) + { + return (int) ((((numBits - 1) >> 6)) + 1); + } + + + /// returns true if both sets have the same bits set + public override bool Equals(System.Object o) + { + if (this == o) + return true; + if (!(o is OpenBitSet)) + return false; + OpenBitSet a; + OpenBitSet b = (OpenBitSet) o; + // make a the larger set. + if (b.wlen > this.wlen) + { + a = b; b = this; + } + else + { + a = this; + } + + // check for any set bits out of the range of b + for (int i = a.wlen - 1; i >= b.wlen; i--) + { + if (a.internalbits[i] != 0) + return false; + } + + for (int i = b.wlen - 1; i >= 0; i--) + { + if (a.internalbits[i] != b.internalbits[i]) + return false; + } + + return true; + } + + public override int GetHashCode() + { + // Start with a zero hash and use a mix that results in zero if the input is zero. + // This effectively truncates trailing zeros without an explicit check. + long h = 0; + for (int i = internalbits.Length; --i >= 0; ) + { + h ^= internalbits[i]; + h = (h << 1) | (Number.URShift(h, 63)); // rotate left + } + // fold leftmost bits into right and add a constant to prevent + // empty sets from returning 0, which is too common. + return (int)(((h >> 32) ^ h) + 0x98761234); + } + + + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/OpenBitSetDISI.cs b/external/Lucene.Net.Light/src/core/Util/OpenBitSetDISI.cs new file mode 100644 index 0000000000..41d9fa924c --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/OpenBitSetDISI.cs @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + +namespace Lucene.Net.Util +{ + + [Serializable] + public class OpenBitSetDISI:OpenBitSet + { + + /// Construct an OpenBitSetDISI with its bits set + /// from the doc ids of the given DocIdSetIterator. + /// Also give a maximum size one larger than the largest doc id for which a + /// bit may ever be set on this OpenBitSetDISI. + /// + public OpenBitSetDISI(DocIdSetIterator disi, int maxSize):base(maxSize) + { + InPlaceOr(disi); + } + + /// Construct an OpenBitSetDISI with no bits set, and a given maximum size + /// one larger than the largest doc id for which a bit may ever be set + /// on this OpenBitSetDISI. + /// + public OpenBitSetDISI(int maxSize):base(maxSize) + { + } + + /// Perform an inplace OR with the doc ids from a given DocIdSetIterator, + /// setting the bit for each such doc id. + /// These doc ids should be smaller than the maximum size passed to the + /// constructor. + /// + public virtual void InPlaceOr(DocIdSetIterator disi) + { + int doc; + long size = Size(); + while ((doc = disi.NextDoc()) < size) + { + FastSet(doc); + } + } + + /// Perform an inplace AND with the doc ids from a given DocIdSetIterator, + /// leaving only the bits set for which the doc ids are in common. + /// These doc ids should be smaller than the maximum size passed to the + /// constructor. + /// + public virtual void InPlaceAnd(DocIdSetIterator disi) + { + int bitSetDoc = NextSetBit(0); + int disiDoc; + while (bitSetDoc != - 1 && (disiDoc = disi.Advance(bitSetDoc)) != DocIdSetIterator.NO_MORE_DOCS) + { + Clear(bitSetDoc, disiDoc); + bitSetDoc = NextSetBit(disiDoc + 1); + } + if (bitSetDoc != - 1) + { + Clear(bitSetDoc, Size()); + } + } + + /// Perform an inplace NOT with the doc ids from a given DocIdSetIterator, + /// clearing all the bits for each such doc id. + /// These doc ids should be smaller than the maximum size passed to the + /// constructor. + /// + public virtual void InPlaceNot(DocIdSetIterator disi) + { + int doc; + long size = Size(); + while ((doc = disi.NextDoc()) < size) + { + FastClear(doc); + } + } + + /// Perform an inplace XOR with the doc ids from a given DocIdSetIterator, + /// flipping all the bits for each such doc id. + /// These doc ids should be smaller than the maximum size passed to the + /// constructor. + /// + public virtual void InPlaceXor(DocIdSetIterator disi) + { + int doc; + long size = Size(); + while ((doc = disi.NextDoc()) < size) + { + FastFlip(doc); + } + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/OpenBitSetIterator.cs b/external/Lucene.Net.Light/src/core/Util/OpenBitSetIterator.cs new file mode 100644 index 0000000000..110dba6808 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/OpenBitSetIterator.cs @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + +namespace Lucene.Net.Util +{ + + /// An iterator to iterate over set bits in an OpenBitSet. + /// This is faster than nextSetBit() for iterating over the complete set of bits, + /// especially when the density of the bits set is high. + /// + /// + /// $Id$ + /// + public class OpenBitSetIterator:DocIdSetIterator + { + + // The General Idea: instead of having an array per byte that has + // the offsets of the next set bit, that array could be + // packed inside a 32 bit integer (8 4 bit numbers). That + // should be faster than accessing an array for each index, and + // the total array size is kept smaller (256*sizeof(int))=1K + // NOTE: Removed protected access for CLS-Compliance + /*protected*/ internal static readonly uint[] bitlist = new uint[] + { + 0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41, + 0x42, 0x421, 0x43, 0x431, 0x432, 0x4321, 0x5, 0x51, + 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, 0x54, 0x541, + 0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6, + 0x61, 0x62, 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64, + 0x641, 0x642, 0x6421, 0x643, 0x6431, 0x6432, 0x64321, + 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532, + 0x65321, 0x654, 0x6541, 0x6542, 0x65421, 0x6543, + 0x65431, 0x65432, 0x654321, 0x7, 0x71, 0x72, 0x721, + 0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, 0x7421, + 0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752, + 0x7521, 0x753, 0x7531, 0x7532, 0x75321, 0x754, 0x7541, + 0x7542, 0x75421, 0x7543, 0x75431, 0x75432, 0x754321, + 0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632, + 0x76321, 0x764, 0x7641, 0x7642, 0x76421, 0x7643, + 0x76431, 0x76432, 0x764321, 0x765, 0x7651, 0x7652, + 0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654, + 0x76541, 0x76542, 0x765421, 0x76543, 0x765431, 0x765432 + , 0x7654321, 0x8, 0x81, 0x82, 0x821, 0x83, 0x831, 0x832 + , 0x8321, 0x84, 0x841, 0x842, 0x8421, 0x843, 0x8431, + 0x8432, 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853, + 0x8531, 0x8532, 0x85321, 0x854, 0x8541, 0x8542, 0x85421 + , 0x8543, 0x85431, 0x85432, 0x854321, 0x86, 0x861, + 0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864, + 0x8641, 0x8642, 0x86421, 0x8643, 0x86431, 0x86432, + 0x864321, 0x865, 0x8651, 0x8652, 0x86521, 0x8653, + 0x86531, 0x86532, 0x865321, 0x8654, 0x86541, 0x86542, + 0x865421, 0x86543, 0x865431, 0x865432, 0x8654321, 0x87, + 0x871, 0x872, 0x8721, 0x873, 0x8731, 0x8732, 0x87321, + 0x874, 0x8741, 0x8742, 0x87421, 0x8743, 0x87431, + 0x87432, 0x874321, 0x875, 0x8751, 0x8752, 0x87521, + 0x8753, 0x87531, 0x87532, 0x875321, 0x8754, 0x87541, + 0x87542, 0x875421, 0x87543, 0x875431, 0x875432, + 0x8754321, 0x876, 0x8761, 0x8762, 0x87621, 0x8763, + 0x87631, 0x87632, 0x876321, 0x8764, 0x87641, 0x87642, + 0x876421, 0x87643, 0x876431, 0x876432, 0x8764321, + 0x8765, 0x87651, 0x87652, 0x876521, 0x87653, 0x876531, + 0x876532, 0x8765321, 0x87654, + 0x876541, 0x876542, 0x8765421, 0x876543, 0x8765431, + 0x8765432, 0x87654321 + }; + /// ** the python code that generated bitlist + /// def bits2int(val): + /// arr=0 + /// for shift in range(8,0,-1): + /// if val & 0x80: + /// arr = (arr << 4) | shift + /// val = val << 1 + /// return arr + /// def int_table(): + /// tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ] + /// return ','.join(tbl) + /// **** + /// + + // hmmm, what about an iterator that finds zeros though, + // or a reverse iterator... should they be separate classes + // for efficiency, or have a common root interface? (or + // maybe both? could ask for a SetBitsIterator, etc... + + private readonly long[] arr; + private readonly int words; + private int i = - 1; + private long word; + private int wordShift; + private int indexArray; + private int curDocId = - 1; + + public OpenBitSetIterator(OpenBitSet obs):this(obs.Bits, obs.NumWords) + { + } + + public OpenBitSetIterator(long[] bits, int numWords) + { + arr = bits; + words = numWords; + } + + // 64 bit shifts + private void Shift() + { + if ((int) word == 0) + { + wordShift += 32; word = (long) ((ulong) word >> 32); + } + if ((word & 0x0000FFFF) == 0) + { + wordShift += 16; word = (long) ((ulong) word >> 16); + } + if ((word & 0x000000FF) == 0) + { + wordShift += 8; word = (long) ((ulong) word >> 8); + } + indexArray = (int) bitlist[word & 0xff]; + } + + /*/// ** alternate shift implementations + /// // 32 bit shifts, but a long shift needed at the end + /// private void shift2() { + /// int y = (int)word; + /// if (y==0) {wordShift +=32; y = (int)(word >>>32); } + /// if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; } + /// if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; } + /// indexArray = bitlist[y & 0xff]; + /// word >>>= (wordShift +1); + /// } + /// private void shift3() { + /// int lower = (int)word; + /// int lowByte = lower & 0xff; + /// if (lowByte != 0) { + /// indexArray=bitlist[lowByte]; + /// return; + /// } + /// shift(); + /// } + /// **** + /// */ + + public override int NextDoc() + { + if (indexArray == 0) + { + if (word != 0) + { + word = (long) ((ulong) word >> 8); + wordShift += 8; + } + + while (word == 0) + { + if (++i >= words) + { + return curDocId = NO_MORE_DOCS; + } + word = arr[i]; + wordShift = - 1; // loop invariant code motion should move this + } + + // after the first time, should I go with a linear search, or + // stick with the binary search in shift? + Shift(); + } + + int bitIndex = (indexArray & 0x0f) + wordShift; + indexArray = (int) ((uint) indexArray >> 4); + // should i<<6 be cached as a separate variable? + // it would only save one cycle in the best circumstances. + return curDocId = (i << 6) + bitIndex; + } + + public override int Advance(int target) + { + indexArray = 0; + i = target >> 6; + if (i >= words) + { + word = 0; // setup so next() will also return -1 + return curDocId = NO_MORE_DOCS; + } + wordShift = target & 0x3f; + word = (long) ((ulong) arr[i] >> wordShift); + if (word != 0) + { + wordShift--; // compensate for 1 based arrIndex + } + else + { + while (word == 0) + { + if (++i >= words) + { + return curDocId = NO_MORE_DOCS; + } + word = arr[i]; + } + wordShift = - 1; + } + + Shift(); + + int bitIndex = (indexArray & 0x0f) + wordShift; + indexArray = (int) ((uint) indexArray >> 4); + // should i<<6 be cached as a separate variable? + // it would only save one cycle in the best circumstances. + return curDocId = (i << 6) + bitIndex; + } + + public override int DocID() + { + return curDocId; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/PriorityQueue.cs b/external/Lucene.Net.Light/src/core/Util/PriorityQueue.cs new file mode 100644 index 0000000000..77a682b704 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/PriorityQueue.cs @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; + +namespace Lucene.Net.Util +{ + + /// A PriorityQueue maintains a partial ordering of its elements such that the + /// least element can always be found in constant time. Put()'s and pop()'s + /// require log(size) time. + /// + ///

NOTE: This class pre-allocates a full array of + /// length maxSize+1, in . + /// + ///

+ // TODO: T needs to be able to return null. Behavior might be unexpected otherwise, since it returns default(T) + // I only see a non-nullable type used in PriorityQueue in the tests. may be possible to re-write tests to + // use an IComparable class, and this can be changed back to constraining on class, to return null, or should + // we leave as is? + public abstract class PriorityQueue //where T : class + { + private int size; + private int maxSize; + protected internal T[] heap; + + /// Determines the ordering of objects in this priority queue. Subclasses + /// must define this one method. + /// + public abstract bool LessThan(T a, T b); + + /// This method can be overridden by extending classes to return a sentinel + /// object which will be used by to fill the queue, so + /// that the code which uses that queue can always assume it's full and only + /// change the top without attempting to insert any new object.
+ /// + /// Those sentinel values should always compare worse than any non-sentinel + /// value (i.e., should always favor the + /// non-sentinel values).
+ /// + /// By default, this method returns false, which means the queue will not be + /// filled with sentinel values. Otherwise, the value returned will be used to + /// pre-populate the queue. Adds sentinel values to the queue.
+ /// + /// If this method is extended to return a non-null value, then the following + /// usage pattern is recommended: + /// + /// + /// // extends getSentinelObject() to return a non-null value. + /// PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits); + /// // save the 'top' element, which is guaranteed to not be null. + /// MyObject pqTop = pq.top(); + /// <...> + /// // now in order to add a new element, which is 'better' than top (after + /// // you've verified it is better), it is as simple as: + /// pqTop.change(). + /// pqTop = pq.updateTop(); + /// + /// + /// NOTE: if this method returns a non-null value, it will be called by + /// times, relying on a new object to + /// be returned and will not check if it's null again. Therefore you should + /// ensure any call to this method creates a new instance and behaves + /// consistently, e.g., it cannot return null if it previously returned + /// non-null. + /// + ///
+ /// the sentinel object to use to pre-populate the queue, or null if sentinel objects are not supported. + protected internal virtual T SentinelObject + { + get { return default(T); } + } + + /// Subclass constructors must call this. + protected internal void Initialize(int maxSize) + { + size = 0; + int heapSize; + if (0 == maxSize) + // We allocate 1 extra to avoid if statement in top() + heapSize = 2; + else + { + if (maxSize == Int32.MaxValue) + { + // Don't wrap heapSize to -1, in this case, which + // causes a confusing NegativeArraySizeException. + // Note that very likely this will simply then hit + // an OOME, but at least that's more indicative to + // caller that this values is too big. We don't +1 + // in this case, but it's very unlikely in practice + // one will actually insert this many objects into + // the PQ: + heapSize = Int32.MaxValue; + } + else + { + // NOTE: we add +1 because all access to heap is + // 1-based not 0-based. heap[0] is unused. + heapSize = maxSize + 1; + } + } + heap = new T[heapSize]; + this.maxSize = maxSize; + + // If sentinel objects are supported, populate the queue with them + T sentinel = SentinelObject; + if (sentinel != null) + { + heap[1] = sentinel; + for (int i = 2; i < heap.Length; i++) + { + heap[i] = SentinelObject; + } + size = maxSize; + } + } + + /// + /// Adds an Object to a PriorityQueue in log(size) time. If one tries to add + /// more objects than maxSize from initialize an + /// is thrown. + /// + /// the new 'top' element in the queue. + /// + public T Add(T element) + { + size++; + heap[size] = element; + UpHeap(); + return heap[1]; + } + + /// Adds an Object to a PriorityQueue in log(size) time. + /// It returns the object (if any) that was + /// dropped off the heap because it was full. This can be + /// the given parameter (in case it is smaller than the + /// full heap's minimum, and couldn't be added), or another + /// object that was previously the smallest value in the + /// heap and now has been replaced by a larger one, or null + /// if the queue wasn't yet full with maxSize elements. + /// + public virtual T InsertWithOverflow(T element) + { + if (size < maxSize) + { + Add(element); + return default(T); + } + else if (size > 0 && !LessThan(element, heap[1])) + { + T ret = heap[1]; + heap[1] = element; + UpdateTop(); + return ret; + } + else + { + return element; + } + } + + /// Returns the least element of the PriorityQueue in constant time. + public T Top() + { + // We don't need to check size here: if maxSize is 0, + // then heap is length 2 array with both entries null. + // If size is 0 then heap[1] is already null. + return heap[1]; + } + + /// + /// Removes and returns the least element of the + /// PriorityQueue in log(size) time. + /// + public T Pop() + { + if (size > 0) + { + T result = heap[1]; // save first value + heap[1] = heap[size]; // move last to first + heap[size] = default(T); // permit GC of objects + size--; + DownHeap(); // adjust heap + return result; + } + else + return default(T); + } + + /// Should be called when the Object at top changes values. + /// Still log(n) worst case, but it's at least twice as fast to + /// + /// pq.top().change(); + /// pq.updateTop(); + /// + /// instead of + /// + /// o = pq.pop(); + /// o.change(); + /// pq.push(o); + /// + /// + /// the new 'top' element. + public T UpdateTop() + { + DownHeap(); + return heap[1]; + } + + /// Returns the number of elements currently stored in the PriorityQueue. + public int Size() + { + return size; + } + + /// Removes all entries from the PriorityQueue. + public void Clear() + { + for (int i = 0; i <= size; i++) + { + heap[i] = default(T); + } + size = 0; + } + + private void UpHeap() + { + int i = size; + T node = heap[i]; // save bottom node + int j = Number.URShift(i, 1); + while (j > 0 && LessThan(node, heap[j])) + { + heap[i] = heap[j]; // shift parents down + i = j; + j = Number.URShift(j, 1); + } + heap[i] = node; // install saved node + } + + private void DownHeap() + { + int i = 1; + T node = heap[i]; // save top node + int j = i << 1; // find smaller child + int k = j + 1; + if (k <= size && LessThan(heap[k], heap[j])) + { + j = k; + } + while (j <= size && LessThan(heap[j], node)) + { + heap[i] = heap[j]; // shift up child + i = j; + j = i << 1; + k = j + 1; + if (k <= size && LessThan(heap[k], heap[j])) + { + j = k; + } + } + heap[i] = node; // install saved node + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/RamUsageEstimator.cs b/external/Lucene.Net.Light/src/core/Util/RamUsageEstimator.cs new file mode 100644 index 0000000000..343f7bb239 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/RamUsageEstimator.cs @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Collections.Generic; + +namespace Lucene.Net.Util +{ + + /// Estimates the size of a given Object using a given MemoryModel for primitive + /// size information. + /// + /// Resource Usage: + /// + /// Internally uses a Map to temporally hold a reference to every + /// object seen. + /// + /// If checkIntered, all Strings checked will be interned, but those + /// that were not already interned will be released for GC when the + /// estimate is complete. + /// + public sealed class RamUsageEstimator + { + private MemoryModel memoryModel; + + private IDictionary seen; + + private int refSize; + private int arraySize; + private int classSize; + + private bool checkInterned; + + /// Constructs this object with an AverageGuessMemoryModel and + /// checkInterned = true. + /// + public RamUsageEstimator():this(new AverageGuessMemoryModel()) + { + } + + /// check if Strings are interned and don't add to size + /// if they are. Defaults to true but if you know the objects you are checking + /// won't likely contain many interned Strings, it will be faster to turn off + /// intern checking. + /// + public RamUsageEstimator(bool checkInterned):this(new AverageGuessMemoryModel(), checkInterned) + { + } + + /// MemoryModel to use for primitive object sizes. + /// + public RamUsageEstimator(MemoryModel memoryModel):this(memoryModel, true) + { + } + + /// MemoryModel to use for primitive object sizes. + /// + /// check if Strings are interned and don't add to size + /// if they are. Defaults to true but if you know the objects you are checking + /// won't likely contain many interned Strings, it will be faster to turn off + /// intern checking. + /// + public RamUsageEstimator(MemoryModel memoryModel, bool checkInterned) + { + this.memoryModel = memoryModel; + this.checkInterned = checkInterned; + // Use Map rather than Set so that we can use an IdentityHashMap - not + // seeing an IdentityHashSet + seen = new IdentityDictionary(64); + this.refSize = memoryModel.ReferenceSize; + this.arraySize = memoryModel.ArraySize; + this.classSize = memoryModel.ClassSize; + } + + public long EstimateRamUsage(System.Object obj) + { + long size = Size(obj); + seen.Clear(); + return size; + } + + private long Size(System.Object obj) + { + if (obj == null) + { + return 0; + } + // interned not part of this object + if (checkInterned && obj is System.String && obj == (System.Object) String.Intern(((System.String) obj))) + { + // interned string will be eligible + // for GC on + // estimateRamUsage(Object) return + return 0; + } + + // skip if we have seen before + if (seen.ContainsKey(obj)) + { + return 0; + } + + // add to seen + seen[obj] = null; + + System.Type clazz = obj.GetType(); + if (clazz.IsArray) + { + return SizeOfArray(obj); + } + + long size = 0; + + // walk type hierarchy + while (clazz != null) + { + System.Reflection.FieldInfo[] fields = clazz.GetFields(System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Public | System.Reflection.BindingFlags.DeclaredOnly | System.Reflection.BindingFlags.Static); + for (int i = 0; i < fields.Length; i++) + { + if (fields[i].IsStatic) + { + continue; + } + + if (fields[i].FieldType.IsPrimitive) + { + size += memoryModel.GetPrimitiveSize(fields[i].FieldType); + } + else + { + size += refSize; + fields[i].GetType(); + try + { + System.Object value_Renamed = fields[i].GetValue(obj); + if (value_Renamed != null) + { + size += Size(value_Renamed); + } + } + catch (System.UnauthorizedAccessException) + { + // ignore for now? + } + } + } + clazz = clazz.BaseType; + } + size += classSize; + return size; + } + + private long SizeOfArray(System.Object obj) + { + int len = ((System.Array) obj).Length; + if (len == 0) + { + return 0; + } + long size = arraySize; + System.Type arrayElementClazz = obj.GetType().GetElementType(); + if (arrayElementClazz.IsPrimitive) + { + size += len * memoryModel.GetPrimitiveSize(arrayElementClazz); + } + else + { + for (int i = 0; i < len; i++) + { + size += refSize + Size(((System.Array) obj).GetValue(i)); + } + } + + return size; + } + + private const long ONE_KB = 1024; + private static readonly long ONE_MB = ONE_KB * ONE_KB; + private static readonly long ONE_GB = ONE_KB * ONE_MB; + + /// Return good default units based on byte size. + public static System.String HumanReadableUnits(long bytes, System.IFormatProvider df) + { + System.String newSizeAndUnits; + + if (bytes / ONE_GB > 0) + { + newSizeAndUnits = System.Convert.ToString(((float) bytes / ONE_GB), df) + " GB"; + } + else if (bytes / ONE_MB > 0) + { + newSizeAndUnits = System.Convert.ToString((float) bytes / ONE_MB, df) + " MB"; + } + else if (bytes / ONE_KB > 0) + { + newSizeAndUnits = System.Convert.ToString((float) bytes / ONE_KB, df) + " KB"; + } + else + { + newSizeAndUnits = System.Convert.ToString(bytes) + " bytes"; + } + + return newSizeAndUnits; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/ReaderUtil.cs b/external/Lucene.Net.Light/src/core/Util/ReaderUtil.cs new file mode 100644 index 0000000000..7c716eb33b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/ReaderUtil.cs @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using IndexReader = Lucene.Net.Index.IndexReader; + +namespace Lucene.Net.Util +{ + /// + /// Common util methods for dealing with s. + /// + public class ReaderUtil + { + /// Gathers sub-readers from reader into a List. + /// + /// + public static void GatherSubReaders(System.Collections.Generic.IList allSubReaders, IndexReader reader) + { + IndexReader[] subReaders = reader.GetSequentialSubReaders(); + if (subReaders == null) + { + // Add the reader itself, and do not recurse + allSubReaders.Add(reader); + } + else + { + for (int i = 0; i < subReaders.Length; i++) + { + GatherSubReaders(allSubReaders, subReaders[i]); + } + } + } + + /// Returns sub IndexReader that contains the given document id. + /// + /// + /// id of document + /// + /// parent reader + /// + /// sub reader of parent which contains the specified doc id + /// + public static IndexReader SubReader(int doc, IndexReader reader) + { + var subReadersList = new System.Collections.Generic.List(); + ReaderUtil.GatherSubReaders(subReadersList, reader); + IndexReader[] subReaders = subReadersList.ToArray(); + int[] docStarts = new int[subReaders.Length]; + int maxDoc = 0; + for (int i = 0; i < subReaders.Length; i++) + { + docStarts[i] = maxDoc; + maxDoc += subReaders[i].MaxDoc; + } + return subReaders[ReaderUtil.SubIndex(doc, docStarts)]; + } + + /// Returns sub-reader subIndex from reader. + /// + /// + /// parent reader + /// + /// index of desired sub reader + /// + /// the subreader at subINdex + /// + public static IndexReader SubReader(IndexReader reader, int subIndex) + { + var subReadersList = new System.Collections.Generic.List(); + ReaderUtil.GatherSubReaders(subReadersList, reader); + IndexReader[] subReaders = subReadersList.ToArray(); + return subReaders[subIndex]; + } + + + /// Returns index of the searcher/reader for document n in the + /// array used to construct this searcher/reader. + /// + public static int SubIndex(int n, int[] docStarts) + { + // find + // searcher/reader for doc n: + int size = docStarts.Length; + int lo = 0; // search starts array + int hi = size - 1; // for first element less than n, return its index + while (hi >= lo) + { + int mid = Number.URShift((lo + hi), 1); + int midValue = docStarts[mid]; + if (n < midValue) + hi = mid - 1; + else if (n > midValue) + lo = mid + 1; + else + { + // found a match + while (mid + 1 < size && docStarts[mid + 1] == midValue) + { + mid++; // scan to last match + } + return mid; + } + } + return hi; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/ScorerDocQueue.cs b/external/Lucene.Net.Light/src/core/Util/ScorerDocQueue.cs new file mode 100644 index 0000000000..ee6c2594f5 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/ScorerDocQueue.cs @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/* Derived from Lucene.Net.Util.PriorityQueue of March 2005 */ +using System; +using Lucene.Net.Support; +using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; +using Scorer = Lucene.Net.Search.Scorer; + +namespace Lucene.Net.Util +{ + + /// A ScorerDocQueue maintains a partial ordering of its Scorers such that the + /// least Scorer can always be found in constant time. Put()'s and pop()'s + /// require log(size) time. The ordering is by Scorer.doc(). + /// + public class ScorerDocQueue + { + // later: SpansQueue for spans with doc and term positions + private HeapedScorerDoc[] heap; + private int maxSize; + private int size; + + private class HeapedScorerDoc + { + private void InitBlock(ScorerDocQueue enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private ScorerDocQueue enclosingInstance; + public ScorerDocQueue Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal Scorer scorer; + internal int doc; + + internal HeapedScorerDoc(ScorerDocQueue enclosingInstance, Scorer s):this(enclosingInstance, s, s.DocID()) + { + } + + internal HeapedScorerDoc(ScorerDocQueue enclosingInstance, Scorer scorer, int doc) + { + InitBlock(enclosingInstance); + this.scorer = scorer; + this.doc = doc; + } + + internal virtual void Adjust() + { + doc = scorer.DocID(); + } + } + + private HeapedScorerDoc topHSD; // same as heap[1], only for speed + + /// Create a ScorerDocQueue with a maximum size. + public ScorerDocQueue(int maxSize) + { + // assert maxSize >= 0; + size = 0; + int heapSize = maxSize + 1; + heap = new HeapedScorerDoc[heapSize]; + this.maxSize = maxSize; + topHSD = heap[1]; // initially null + } + + /// Adds a Scorer to a ScorerDocQueue in log(size) time. + /// If one tries to add more Scorers than maxSize + /// a RuntimeException (ArrayIndexOutOfBound) is thrown. + /// + public void Put(Scorer scorer) + { + size++; + heap[size] = new HeapedScorerDoc(this, scorer); + UpHeap(); + } + + /// Adds a Scorer to the ScorerDocQueue in log(size) time if either + /// the ScorerDocQueue is not full, or not lessThan(scorer, top()). + /// + /// + /// + /// true if scorer is added, false otherwise. + /// + public virtual bool Insert(Scorer scorer) + { + if (size < maxSize) + { + Put(scorer); + return true; + } + else + { + int docNr = scorer.DocID(); + if ((size > 0) && (!(docNr < topHSD.doc))) + { + // heap[1] is top() + heap[1] = new HeapedScorerDoc(this, scorer, docNr); + DownHeap(); + return true; + } + else + { + return false; + } + } + } + + /// Returns the least Scorer of the ScorerDocQueue in constant time. + /// Should not be used when the queue is empty. + /// + public Scorer Top() + { + // assert size > 0; + return topHSD.scorer; + } + + /// Returns document number of the least Scorer of the ScorerDocQueue + /// in constant time. + /// Should not be used when the queue is empty. + /// + public int TopDoc() + { + // assert size > 0; + return topHSD.doc; + } + + public float TopScore() + { + // assert size > 0; + return topHSD.scorer.Score(); + } + + public bool TopNextAndAdjustElsePop() + { + return CheckAdjustElsePop(topHSD.scorer.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + } + + public bool TopSkipToAndAdjustElsePop(int target) + { + return CheckAdjustElsePop(topHSD.scorer.Advance(target) != DocIdSetIterator.NO_MORE_DOCS); + } + + private bool CheckAdjustElsePop(bool cond) + { + if (cond) + { + // see also adjustTop + topHSD.doc = topHSD.scorer.DocID(); + } + else + { + // see also popNoResult + heap[1] = heap[size]; // move last to first + heap[size] = null; + size--; + } + DownHeap(); + return cond; + } + + /// Removes and returns the least scorer of the ScorerDocQueue in log(size) + /// time. + /// Should not be used when the queue is empty. + /// + public Scorer Pop() + { + // assert size > 0; + Scorer result = topHSD.scorer; + PopNoResult(); + return result; + } + + /// Removes the least scorer of the ScorerDocQueue in log(size) time. + /// Should not be used when the queue is empty. + /// + private void PopNoResult() + { + heap[1] = heap[size]; // move last to first + heap[size] = null; + size--; + DownHeap(); // adjust heap + } + + /// Should be called when the scorer at top changes doc() value. + /// Still log(n) worst case, but it's at least twice as fast to + /// { pq.top().change(); pq.adjustTop(); } + /// instead of + /// { o = pq.pop(); o.change(); pq.push(o); } + /// + /// + public void AdjustTop() + { + // assert size > 0; + topHSD.Adjust(); + DownHeap(); + } + + /// Returns the number of scorers currently stored in the ScorerDocQueue. + public int Size() + { + return size; + } + + /// Removes all entries from the ScorerDocQueue. + public void Clear() + { + for (int i = 0; i <= size; i++) + { + heap[i] = null; + } + size = 0; + } + + private void UpHeap() + { + int i = size; + HeapedScorerDoc node = heap[i]; // save bottom node + int j = Number.URShift(i, 1); + while ((j > 0) && (node.doc < heap[j].doc)) + { + heap[i] = heap[j]; // shift parents down + i = j; + j = Number.URShift(j, 1); + } + heap[i] = node; // install saved node + topHSD = heap[1]; + } + + private void DownHeap() + { + int i = 1; + HeapedScorerDoc node = heap[i]; // save top node + int j = i << 1; // find smaller child + int k = j + 1; + if ((k <= size) && (heap[k].doc < heap[j].doc)) + { + j = k; + } + while ((j <= size) && (heap[j].doc < node.doc)) + { + heap[i] = heap[j]; // shift up child + i = j; + j = i << 1; + k = j + 1; + if (k <= size && (heap[k].doc < heap[j].doc)) + { + j = k; + } + } + heap[i] = node; // install saved node + topHSD = heap[1]; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/SimpleStringInterner.cs b/external/Lucene.Net.Light/src/core/Util/SimpleStringInterner.cs new file mode 100644 index 0000000000..eea707a921 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/SimpleStringInterner.cs @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + + /// Simple lockless and memory barrier free String intern cache that is guaranteed + /// to return the same String instance as String.intern() does. + /// + public class SimpleStringInterner:StringInterner + { + + internal /*private*/ class Entry + { + internal /*private*/ System.String str; + internal /*private*/ int hash; + internal /*private*/ Entry next; + internal Entry(System.String str, int hash, Entry next) + { + this.str = str; + this.hash = hash; + this.next = next; + } + } + + private Entry[] cache; + private int maxChainLength; + + /// Size of the hash table, should be a power of two. + /// + /// Maximum length of each bucket, after which the oldest item inserted is dropped. + /// + public SimpleStringInterner(int tableSize, int maxChainLength) + { + cache = new Entry[System.Math.Max(1, BitUtil.NextHighestPowerOfTwo(tableSize))]; + this.maxChainLength = System.Math.Max(2, maxChainLength); + } + + // @Override + public override System.String Intern(System.String s) + { + int h = s.GetHashCode(); + // In the future, it may be worth augmenting the string hash + // if the lower bits need better distribution. + int slot = h & (cache.Length - 1); + + Entry first = this.cache[slot]; + Entry nextToLast = null; + + int chainLength = 0; + + for (Entry e = first; e != null; e = e.next) + { + if (e.hash == h && (ReferenceEquals(e.str, s) || String.CompareOrdinal(e.str, s) == 0)) + { + // if (e.str == s || (e.hash == h && e.str.compareTo(s)==0)) { + return e.str; + } + + chainLength++; + if (e.next != null) + { + nextToLast = e; + } + } + + // insertion-order cache: add new entry at head + s = String.Intern(s); + this.cache[slot] = new Entry(s, h, first); + if (chainLength >= maxChainLength) + { + // prune last entry + nextToLast.next = null; + } + return s; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/SmallFloat.cs b/external/Lucene.Net.Light/src/core/Util/SmallFloat.cs new file mode 100644 index 0000000000..848fb1324d --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/SmallFloat.cs @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + + /// Floating point numbers smaller than 32 bits. + /// + /// + /// $Id$ + /// + public class SmallFloat + { + + /// Converts a 32 bit float to an 8 bit float. + ///
Values less than zero are all mapped to zero. + ///
Values are truncated (rounded down) to the nearest 8 bit value. + ///
Values between zero and the smallest representable value + /// are rounded up. + /// + ///
+ /// the 32 bit float to be converted to an 8 bit float (byte) + /// + /// the number of mantissa bits to use in the byte, with the remainder to be used in the exponent + /// + /// the zero-point in the range of exponent values + /// + /// the 8 bit float representation + /// + public static sbyte FloatToByte(float f, int numMantissaBits, int zeroExp) + { + // Adjustment from a float zero exponent to our zero exponent, + // shifted over to our exponent position. + int fzero = (63 - zeroExp) << numMantissaBits; + int bits = System.BitConverter.ToInt32(System.BitConverter.GetBytes(f), 0); + int smallfloat = bits >> (24 - numMantissaBits); + if (smallfloat < fzero) + { + return (bits <= 0)?(sbyte) 0:(sbyte) 1; // underflow is mapped to smallest non-zero number. + } + else if (smallfloat >= fzero + 0x100) + { + return - 1; // overflow maps to largest number + } + else + { + return (sbyte) (smallfloat - fzero); + } + } + + /// Converts an 8 bit float to a 32 bit float. + public static float ByteToFloat(byte b, int numMantissaBits, int zeroExp) + { + // on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup + // is only a little bit faster (anywhere from 0% to 7%) + if (b == 0) + return 0.0f; + int bits = (b & 0xff) << (24 - numMantissaBits); + bits += ((63 - zeroExp) << 24); + return BitConverter.ToSingle(BitConverter.GetBytes(bits), 0); + } + + + // + // Some specializations of the generic functions follow. + // The generic functions are just as fast with current (1.5) + // -server JVMs, but still slower with client JVMs. + // + + /// floatToByte(b, mantissaBits=3, zeroExponent=15) + ///
smallest non-zero value = 5.820766E-10 + ///
largest value = 7.5161928E9 + ///
epsilon = 0.125 + ///
+ public static sbyte FloatToByte315(float f) + { + int bits = System.BitConverter.ToInt32(System.BitConverter.GetBytes(f), 0); + int smallfloat = bits >> (24 - 3); + if (smallfloat < (63 - 15) << 3) + { + return (bits <= 0)?(sbyte) 0:(sbyte) 1; + } + if (smallfloat >= ((63 - 15) << 3) + 0x100) + { + return - 1; + } + return (sbyte) (smallfloat - ((63 - 15) << 3)); + } + + /// byteToFloat(b, mantissaBits=3, zeroExponent=15) + public static float Byte315ToFloat(byte b) + { + // on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup + // is only a little bit faster (anywhere from 0% to 7%) + if (b == 0) + return 0.0f; + int bits = (b & 0xff) << (24 - 3); + bits += ((63 - 15) << 24); + return BitConverter.ToSingle(BitConverter.GetBytes(bits), 0); + } + + + /// floatToByte(b, mantissaBits=5, zeroExponent=2) + ///
smallest nonzero value = 0.033203125 + ///
largest value = 1984.0 + ///
epsilon = 0.03125 + ///
+ public static sbyte FloatToByte52(float f) + { + int bits = System.BitConverter.ToInt32(System.BitConverter.GetBytes(f), 0); + int smallfloat = bits >> (24 - 5); + if (smallfloat < (63 - 2) << 5) + { + return (bits <= 0)?(sbyte) 0:(sbyte) 1; + } + if (smallfloat >= ((63 - 2) << 5) + 0x100) + { + return - 1; + } + return (sbyte) (smallfloat - ((63 - 2) << 5)); + } + + /// byteToFloat(b, mantissaBits=5, zeroExponent=2) + public static float Byte52ToFloat(byte b) + { + // on Java1.5 & 1.6 JVMs, prebuilding a decoding array and doing a lookup + // is only a little bit faster (anywhere from 0% to 7%) + if (b == 0) + return 0.0f; + int bits = (b & 0xff) << (24 - 5); + bits += ((63 - 2) << 24); + return BitConverter.ToSingle(BitConverter.GetBytes(bits), 0); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/SortedVIntList.cs b/external/Lucene.Net.Light/src/core/Util/SortedVIntList.cs new file mode 100644 index 0000000000..5e8e8d41f3 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/SortedVIntList.cs @@ -0,0 +1,289 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using Lucene.Net.Support; +using DocIdSet = Lucene.Net.Search.DocIdSet; +using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + +namespace Lucene.Net.Util +{ + + /// Stores and iterate on sorted integers in compressed form in RAM.
+ /// The code for compressing the differences between ascending integers was + /// borrowed from and + /// .

+ /// NOTE: this class assumes the stored integers are doc Ids (hence why it + /// extends ). Therefore its assumes + /// can be used as sentinel. If you intent to use + /// this value, then make sure it's not used during search flow. + ///

+ public class SortedVIntList:DocIdSet + { + private class AnonymousClassDocIdSetIterator:DocIdSetIterator + { + public AnonymousClassDocIdSetIterator(SortedVIntList enclosingInstance) + { + InitBlock(enclosingInstance); + } + private void InitBlock(SortedVIntList enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SortedVIntList enclosingInstance; + public SortedVIntList Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + internal int bytePos = 0; + internal int lastInt = 0; + internal int doc = - 1; + + private void Advance() + { + // See Lucene.Net.Store.IndexInput.readVInt() + sbyte b = Enclosing_Instance.bytes[bytePos++]; + lastInt += (b & Lucene.Net.Util.SortedVIntList.VB1); + for (int s = Lucene.Net.Util.SortedVIntList.BIT_SHIFT; (b & ~ Lucene.Net.Util.SortedVIntList.VB1) != 0; s += Lucene.Net.Util.SortedVIntList.BIT_SHIFT) + { + b = Enclosing_Instance.bytes[bytePos++]; + lastInt += ((b & Lucene.Net.Util.SortedVIntList.VB1) << s); + } + } + + public override int DocID() + { + return doc; + } + + public override int NextDoc() + { + if (bytePos >= Enclosing_Instance.lastBytePos) + { + doc = NO_MORE_DOCS; + } + else + { + Advance(); + doc = lastInt; + } + return doc; + } + + public override int Advance(int target) + { + while (bytePos < Enclosing_Instance.lastBytePos) + { + Advance(); + if (lastInt >= target) + { + return doc = lastInt; + } + } + return doc = NO_MORE_DOCS; + } + } + /// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, + /// a SortedVIntList representing the index numbers of the set bits + /// will be smaller than that BitSet. + /// + internal const int BITS2VINTLIST_SIZE = 8; + + private int size; + private sbyte[] bytes; + private int lastBytePos; + + /// Create a SortedVIntList from all elements of an array of integers. + /// + /// + /// A sorted array of non negative integers. + /// + public SortedVIntList(params int[] sortedInts):this(sortedInts, sortedInts.Length) + { + } + + /// Create a SortedVIntList from an array of integers. + /// An array of sorted non negative integers. + /// + /// The number of integers to be used from the array. + /// + public SortedVIntList(int[] sortedInts, int inputSize) + { + SortedVIntListBuilder builder = new SortedVIntListBuilder(this); + for (int i = 0; i < inputSize; i++) + { + builder.AddInt(sortedInts[i]); + } + builder.Done(); + } + + /// Create a SortedVIntList from a BitSet. + /// A bit set representing a set of integers. + /// + public SortedVIntList(System.Collections.BitArray bits) + { + SortedVIntListBuilder builder = new SortedVIntListBuilder(this); + int nextInt = BitSetSupport.NextSetBit(bits, 0); + while (nextInt != - 1) + { + builder.AddInt(nextInt); + nextInt = BitSetSupport.NextSetBit(bits, nextInt + 1); + } + builder.Done(); + } + + /// Create a SortedVIntList from an OpenBitSet. + /// A bit set representing a set of integers. + /// + public SortedVIntList(OpenBitSet bits) + { + SortedVIntListBuilder builder = new SortedVIntListBuilder(this); + int nextInt = bits.NextSetBit(0); + while (nextInt != - 1) + { + builder.AddInt(nextInt); + nextInt = bits.NextSetBit(nextInt + 1); + } + builder.Done(); + } + + /// Create a SortedVIntList. + /// An iterator providing document numbers as a set of integers. + /// This DocIdSetIterator is iterated completely when this constructor + /// is called and it must provide the integers in non + /// decreasing order. + /// + public SortedVIntList(DocIdSetIterator docIdSetIterator) + { + SortedVIntListBuilder builder = new SortedVIntListBuilder(this); + int doc; + while ((doc = docIdSetIterator.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + { + builder.AddInt(doc); + } + builder.Done(); + } + + + private class SortedVIntListBuilder + { + private void InitBlock(SortedVIntList enclosingInstance) + { + this.enclosingInstance = enclosingInstance; + } + private SortedVIntList enclosingInstance; + public SortedVIntList Enclosing_Instance + { + get + { + return enclosingInstance; + } + + } + private int lastInt = 0; + + internal SortedVIntListBuilder(SortedVIntList enclosingInstance) + { + InitBlock(enclosingInstance); + Enclosing_Instance.InitBytes(); + lastInt = 0; + } + + internal virtual void AddInt(int nextInt) + { + int diff = nextInt - lastInt; + if (diff < 0) + { + throw new System.ArgumentException("Input not sorted or first element negative."); + } + + if ((Enclosing_Instance.lastBytePos + Enclosing_Instance.MAX_BYTES_PER_INT) > Enclosing_Instance.bytes.Length) + { + // biggest possible int does not fit + Enclosing_Instance.ResizeBytes((Enclosing_Instance.bytes.Length * 2) + Enclosing_Instance.MAX_BYTES_PER_INT); + } + + // See Lucene.Net.Store.IndexOutput.writeVInt() + while ((diff & ~ Lucene.Net.Util.SortedVIntList.VB1) != 0) + { + // The high bit of the next byte needs to be set. + Enclosing_Instance.bytes[Enclosing_Instance.lastBytePos++] = (sbyte) ((diff & Lucene.Net.Util.SortedVIntList.VB1) | ~ Lucene.Net.Util.SortedVIntList.VB1); + diff = Number.URShift(diff, Lucene.Net.Util.SortedVIntList.BIT_SHIFT); + } + Enclosing_Instance.bytes[Enclosing_Instance.lastBytePos++] = (sbyte) diff; // Last byte, high bit not set. + Enclosing_Instance.size++; + lastInt = nextInt; + } + + internal virtual void Done() + { + Enclosing_Instance.ResizeBytes(Enclosing_Instance.lastBytePos); + } + } + + + private void InitBytes() + { + size = 0; + bytes = new sbyte[128]; // initial byte size + lastBytePos = 0; + } + + private void ResizeBytes(int newSize) + { + if (newSize != bytes.Length) + { + sbyte[] newBytes = new sbyte[newSize]; + Array.Copy(bytes, 0, newBytes, 0, lastBytePos); + bytes = newBytes; + } + } + + private const int VB1 = 0x7F; + private const int BIT_SHIFT = 7; + private int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1; + + /// The total number of sorted integers. + public virtual int Size + { + get { return size; } + } + + /// The size of the byte array storing the compressed sorted integers. + public virtual int ByteSize + { + get { return bytes.Length; } + } + + /// This DocIdSet implementation is cacheable. + public override bool IsCacheable + { + get { return true; } + } + + /// An iterator over the sorted integers. + /// + public override DocIdSetIterator Iterator() + { + return new AnonymousClassDocIdSetIterator(this); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/SorterTemplate.cs b/external/Lucene.Net.Light/src/core/Util/SorterTemplate.cs new file mode 100644 index 0000000000..a44f229d79 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/SorterTemplate.cs @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + /// Borrowed from Cglib. Allows custom swap so that two arrays can be sorted + /// at the same time. + /// + public abstract class SorterTemplate + { + private const int MERGESORT_THRESHOLD = 12; + private const int QUICKSORT_THRESHOLD = 7; + + abstract protected internal void Swap(int i, int j); + abstract protected internal int Compare(int i, int j); + + public virtual void QuickSort(int lo, int hi) + { + QuickSortHelper(lo, hi); + InsertionSort(lo, hi); + } + + private void QuickSortHelper(int lo, int hi) + { + for (; ; ) + { + int diff = hi - lo; + if (diff <= QUICKSORT_THRESHOLD) + { + break; + } + int i = (hi + lo) / 2; + if (Compare(lo, i) > 0) + { + Swap(lo, i); + } + if (Compare(lo, hi) > 0) + { + Swap(lo, hi); + } + if (Compare(i, hi) > 0) + { + Swap(i, hi); + } + int j = hi - 1; + Swap(i, j); + i = lo; + int v = j; + for (; ; ) + { + while (Compare(++i, v) < 0) + { + /* nothing */ ; + } + while (Compare(--j, v) > 0) + { + /* nothing */ ; + } + if (j < i) + { + break; + } + Swap(i, j); + } + Swap(i, hi - 1); + if (j - lo <= hi - i + 1) + { + QuickSortHelper(lo, j); + lo = i + 1; + } + else + { + QuickSortHelper(i + 1, hi); + hi = j; + } + } + } + + private void InsertionSort(int lo, int hi) + { + for (int i = lo + 1; i <= hi; i++) + { + for (int j = i; j > lo; j--) + { + if (Compare(j - 1, j) > 0) + { + Swap(j - 1, j); + } + else + { + break; + } + } + } + } + + protected internal virtual void MergeSort(int lo, int hi) + { + int diff = hi - lo; + if (diff <= MERGESORT_THRESHOLD) + { + InsertionSort(lo, hi); + return ; + } + int mid = lo + diff / 2; + MergeSort(lo, mid); + MergeSort(mid, hi); + Merge(lo, mid, hi, mid - lo, hi - mid); + } + + private void Merge(int lo, int pivot, int hi, int len1, int len2) + { + if (len1 == 0 || len2 == 0) + { + return ; + } + if (len1 + len2 == 2) + { + if (Compare(pivot, lo) < 0) + { + Swap(pivot, lo); + } + return ; + } + int first_cut, second_cut; + int len11, len22; + if (len1 > len2) + { + len11 = len1 / 2; + first_cut = lo + len11; + second_cut = Lower(pivot, hi, first_cut); + len22 = second_cut - pivot; + } + else + { + len22 = len2 / 2; + second_cut = pivot + len22; + first_cut = Upper(lo, pivot, second_cut); + len11 = first_cut - lo; + } + Rotate(first_cut, pivot, second_cut); + int new_mid = first_cut + len22; + Merge(lo, first_cut, new_mid, len11, len22); + Merge(new_mid, second_cut, hi, len1 - len11, len2 - len22); + } + + private void Rotate(int lo, int mid, int hi) + { + int lot = lo; + int hit = mid - 1; + while (lot < hit) + { + Swap(lot++, hit--); + } + lot = mid; hit = hi - 1; + while (lot < hit) + { + Swap(lot++, hit--); + } + lot = lo; hit = hi - 1; + while (lot < hit) + { + Swap(lot++, hit--); + } + } + + private int Lower(int lo, int hi, int val) + { + int len = hi - lo; + while (len > 0) + { + int half = len / 2; + int mid = lo + half; + if (Compare(mid, val) < 0) + { + lo = mid + 1; + len = len - half - 1; + } + else + { + len = half; + } + } + return lo; + } + + private int Upper(int lo, int hi, int val) + { + int len = hi - lo; + while (len > 0) + { + int half = len / 2; + int mid = lo + half; + if (Compare(val, mid) < 0) + { + len = half; + } + else + { + lo = mid + 1; + len = len - half - 1; + } + } + return lo; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/StringHelper.cs b/external/Lucene.Net.Light/src/core/Util/StringHelper.cs new file mode 100644 index 0000000000..3851087087 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/StringHelper.cs @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + + /// Methods for manipulating strings. + public abstract class StringHelper + { + /// Expert: + /// The StringInterner implementation used by Lucene. + /// This shouldn't be changed to an incompatible implementation after other Lucene APIs have been used. + /// + public static StringInterner interner = new SimpleStringInterner(1024, 8); + + /// Return the same string object for all equal strings + public static System.String Intern(System.String s) + { + return interner.Intern(s); + } + + /// Compares two byte[] arrays, element by element, and returns the + /// number of elements common to both arrays. + /// + /// + /// The first byte[] to compare + /// + /// + /// The second byte[] to compare + /// + /// + /// The number of common elements. + /// + public static int BytesDifference(byte[] bytes1, int len1, byte[] bytes2, int len2) + { + int len = len1 < len2?len1:len2; + for (int i = 0; i < len; i++) + if (bytes1[i] != bytes2[i]) + return i; + return len; + } + + /// Compares two strings, character by character, and returns the + /// first position where the two strings differ from one another. + /// + /// + /// The first string to compare + /// + /// The second string to compare + /// + /// The first position where the two strings differ. + /// + public static int StringDifference(System.String s1, System.String s2) + { + int len1 = s1.Length; + int len2 = s2.Length; + int len = len1 < len2?len1:len2; + for (int i = 0; i < len; i++) + { + if (s1[i] != s2[i]) + { + return i; + } + } + return len; + } + + private StringHelper() + { + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/StringInterner.cs b/external/Lucene.Net.Light/src/core/Util/StringInterner.cs new file mode 100644 index 0000000000..b9efe5a228 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/StringInterner.cs @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + /// Subclasses of StringInterner are required to + /// return the same single String object for all equal strings. + /// Depending on the implementation, this may not be + /// the same object returned as String.intern(). + /// + /// This StringInterner base class simply delegates to String.intern(). + /// + public class StringInterner + { + /// Returns a single object instance for each equal string. + public virtual System.String Intern(System.String s) + { + return String.Intern(s); + } + + /// Returns a single object instance for each equal string. + public virtual System.String Intern(char[] arr, int offset, int len) + { + return Intern(new System.String(arr, offset, len)); + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/ToStringUtils.cs b/external/Lucene.Net.Light/src/core/Util/ToStringUtils.cs new file mode 100644 index 0000000000..c1ba66567e --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/ToStringUtils.cs @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + /// Helper methods to ease implementing . + public class ToStringUtils + { + /// for printing boost only if not 1.0 + public static System.String Boost(float boost) + { + if (boost != 1.0f) + { + float boostAsLong = (long) boost; + if (boostAsLong == boost) + return "^" + boost.ToString(".0").Replace(System.Globalization.CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator, "."); + return "^" + boost.ToString().Replace(System.Globalization.CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator, "."); + } + else + return ""; + } + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/UnicodeUtil.cs b/external/Lucene.Net.Light/src/core/Util/UnicodeUtil.cs new file mode 100644 index 0000000000..9a3499229b --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/UnicodeUtil.cs @@ -0,0 +1,505 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + + + /* + * Some of this code came from the excellent Unicode + * conversion examples from: + * + * http://www.unicode.org/Public/PROGRAMS/CVTUTF + * + * Full Copyright for that code follows:*/ + + /* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + + /// Class to encode java's UTF16 char[] into UTF8 byte[] + /// without always allocating a new byte[] as + /// String.getBytes("UTF-8") does. + /// + ///

WARNING: This API is a new and experimental and + /// may suddenly change.

+ ///

+ + public static class UnicodeUtil + { + + public const int UNI_SUR_HIGH_START = 0xD800; + public const int UNI_SUR_HIGH_END = 0xDBFF; + public const int UNI_SUR_LOW_START = 0xDC00; + public const int UNI_SUR_LOW_END = 0xDFFF; + public const int UNI_REPLACEMENT_CHAR = 0xFFFD; + + private const long UNI_MAX_BMP = 0x0000FFFF; + + private const int HALF_BASE = 0x0010000; + private const long HALF_SHIFT = 10; + private const long HALF_MASK = 0x3FFL; + + public sealed class UTF8Result + { + public byte[] result = new byte[10]; + public int length; + + public void SetLength(int newLength) + { + if (result.Length < newLength) + { + byte[] newArray = new byte[(int) (1.5 * newLength)]; + Array.Copy(result, 0, newArray, 0, length); + result = newArray; + } + length = newLength; + } + } + + public sealed class UTF16Result + { + public char[] result = new char[10]; + public int[] offsets = new int[10]; + public int length; + + public void SetLength(int newLength) + { + if (result.Length < newLength) + { + char[] newArray = new char[(int) (1.5 * newLength)]; + Array.Copy(result, 0, newArray, 0, length); + result = newArray; + } + length = newLength; + } + + public void CopyText(UTF16Result other) + { + SetLength(other.length); + Array.Copy(other.result, 0, result, 0, length); + } + } + + /// Encode characters from a char[] source, starting at + /// offset and stopping when the character 0xffff is seen. + /// Returns the number of bytes written to bytesOut. + /// + public static void UTF16toUTF8(char[] source, int offset, UTF8Result result) + { + + int upto = 0; + int i = offset; + byte[] out_Renamed = result.result; + + while (true) + { + + int code = (int) source[i++]; + + if (upto + 4 > out_Renamed.Length) + { + byte[] newOut = new byte[2 * out_Renamed.Length]; + System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4); + Array.Copy(out_Renamed, 0, newOut, 0, upto); + result.result = out_Renamed = newOut; + } + if (code < 0x80) + out_Renamed[upto++] = (byte) code; + else if (code < 0x800) + { + out_Renamed[upto++] = (byte) (0xC0 | (code >> 6)); + out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F)); + } + else if (code < 0xD800 || code > 0xDFFF) + { + if (code == 0xffff) + // END + break; + out_Renamed[upto++] = (byte) (0xE0 | (code >> 12)); + out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F)); + } + else + { + // surrogate pair + // confirm valid high surrogate + if (code < 0xDC00 && source[i] != 0xffff) + { + int utf32 = (int) source[i]; + // confirm valid low surrogate and write pair + if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) + { + utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); + i++; + out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18)); + out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F)); + continue; + } + } + // replace unpaired surrogate or out-of-order low surrogate + // with substitution character + out_Renamed[upto++] = (byte) (0xEF); + out_Renamed[upto++] = (byte) (0xBF); + out_Renamed[upto++] = (byte) (0xBD); + } + } + //assert matches(source, offset, i-offset-1, out, upto); + result.length = upto; + } + + /// Encode characters from a char[] source, starting at + /// offset for length chars. Returns the number of bytes + /// written to bytesOut. + /// + public static void UTF16toUTF8(char[] source, int offset, int length, UTF8Result result) + { + + int upto = 0; + int i = offset; + int end = offset + length; + byte[] out_Renamed = result.result; + + while (i < end) + { + + int code = (int) source[i++]; + + if (upto + 4 > out_Renamed.Length) + { + byte[] newOut = new byte[2 * out_Renamed.Length]; + System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4); + Array.Copy(out_Renamed, 0, newOut, 0, upto); + result.result = out_Renamed = newOut; + } + if (code < 0x80) + out_Renamed[upto++] = (byte) code; + else if (code < 0x800) + { + out_Renamed[upto++] = (byte) (0xC0 | (code >> 6)); + out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F)); + } + else if (code < 0xD800 || code > 0xDFFF) + { + out_Renamed[upto++] = (byte) (0xE0 | (code >> 12)); + out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F)); + } + else + { + // surrogate pair + // confirm valid high surrogate + if (code < 0xDC00 && i < end && source[i] != 0xffff) + { + int utf32 = (int) source[i]; + // confirm valid low surrogate and write pair + if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) + { + utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); + i++; + out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18)); + out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F)); + continue; + } + } + // replace unpaired surrogate or out-of-order low surrogate + // with substitution character + out_Renamed[upto++] = (byte) (0xEF); + out_Renamed[upto++] = (byte) (0xBF); + out_Renamed[upto++] = (byte) (0xBD); + } + } + //assert matches(source, offset, length, out, upto); + result.length = upto; + } + + /// Encode characters from this String, starting at offset + /// for length characters. Returns the number of bytes + /// written to bytesOut. + /// + public static void UTF16toUTF8(System.String s, int offset, int length, UTF8Result result) + { + int end = offset + length; + + byte[] out_Renamed = result.result; + + int upto = 0; + for (int i = offset; i < end; i++) + { + int code = (int) s[i]; + + if (upto + 4 > out_Renamed.Length) + { + byte[] newOut = new byte[2 * out_Renamed.Length]; + System.Diagnostics.Debug.Assert(newOut.Length >= upto + 4); + Array.Copy(out_Renamed, 0, newOut, 0, upto); + result.result = out_Renamed = newOut; + } + if (code < 0x80) + out_Renamed[upto++] = (byte) code; + else if (code < 0x800) + { + out_Renamed[upto++] = (byte) (0xC0 | (code >> 6)); + out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F)); + } + else if (code < 0xD800 || code > 0xDFFF) + { + out_Renamed[upto++] = (byte) (0xE0 | (code >> 12)); + out_Renamed[upto++] = (byte) (0x80 | ((code >> 6) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | (code & 0x3F)); + } + else + { + // surrogate pair + // confirm valid high surrogate + if (code < 0xDC00 && (i < end - 1)) + { + int utf32 = (int) s[i + 1]; + // confirm valid low surrogate and write pair + if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) + { + utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); + i++; + out_Renamed[upto++] = (byte) (0xF0 | (utf32 >> 18)); + out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 12) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | ((utf32 >> 6) & 0x3F)); + out_Renamed[upto++] = (byte) (0x80 | (utf32 & 0x3F)); + continue; + } + } + // replace unpaired surrogate or out-of-order low surrogate + // with substitution character + out_Renamed[upto++] = (byte) (0xEF); + out_Renamed[upto++] = (byte) (0xBF); + out_Renamed[upto++] = (byte) (0xBD); + } + } + //assert matches(s, offset, length, out, upto); + result.length = upto; + } + + /// Convert UTF8 bytes into UTF16 characters. If offset + /// is non-zero, conversion starts at that starting point + /// in utf8, re-using the results from the previous call + /// up until offset. + /// + public static void UTF8toUTF16(byte[] utf8, int offset, int length, UTF16Result result) + { + + int end = offset + length; + char[] out_Renamed = result.result; + if (result.offsets.Length <= end) + { + int[] newOffsets = new int[2 * end]; + Array.Copy(result.offsets, 0, newOffsets, 0, result.offsets.Length); + result.offsets = newOffsets; + } + int[] offsets = result.offsets; + + // If incremental decoding fell in the middle of a + // single unicode character, rollback to its start: + int upto = offset; + while (offsets[upto] == - 1) + upto--; + + int outUpto = offsets[upto]; + + // Pre-allocate for worst case 1-for-1 + if (outUpto + length >= out_Renamed.Length) + { + char[] newOut = new char[2 * (outUpto + length)]; + Array.Copy(out_Renamed, 0, newOut, 0, outUpto); + result.result = out_Renamed = newOut; + } + + while (upto < end) + { + + int b = utf8[upto] & 0xff; + int ch; + + offsets[upto++] = outUpto; + + if (b < 0xc0) + { + System.Diagnostics.Debug.Assert(b < 0x80); + ch = b; + } + else if (b < 0xe0) + { + ch = ((b & 0x1f) << 6) + (utf8[upto] & 0x3f); + offsets[upto++] = - 1; + } + else if (b < 0xf0) + { + ch = ((b & 0xf) << 12) + ((utf8[upto] & 0x3f) << 6) + (utf8[upto + 1] & 0x3f); + offsets[upto++] = - 1; + offsets[upto++] = - 1; + } + else + { + System.Diagnostics.Debug.Assert(b < 0xf8); + ch = ((b & 0x7) << 18) + ((utf8[upto] & 0x3f) << 12) + ((utf8[upto + 1] & 0x3f) << 6) + (utf8[upto + 2] & 0x3f); + offsets[upto++] = - 1; + offsets[upto++] = - 1; + offsets[upto++] = - 1; + } + + if (ch <= UNI_MAX_BMP) + { + // target is a character <= 0xFFFF + out_Renamed[outUpto++] = (char) ch; + } + else + { + // target is a character in range 0xFFFF - 0x10FFFF + int chHalf = ch - HALF_BASE; + out_Renamed[outUpto++] = (char) ((chHalf >> (int) HALF_SHIFT) + UNI_SUR_HIGH_START); + out_Renamed[outUpto++] = (char) ((chHalf & HALF_MASK) + UNI_SUR_LOW_START); + } + } + + offsets[upto] = outUpto; + result.length = outUpto; + } + + // Only called from assert + /* + private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) { + try { + String s1 = new String(source, offset, length); + String s2 = new String(result, 0, upto, "UTF-8"); + if (!s1.equals(s2)) { + //System.out.println("DIFF: s1 len=" + s1.length()); + //for(int i=0;i= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + if (i < size-1) { + i++; + char nextCH = s.charAt(i); + if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) { + // Valid surrogate pair + } else + // Unmatched hight surrogate + return false; + } else + // Unmatched hight surrogate + return false; + } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + // Unmatched low surrogate + return false; + } + + return true; + } + + public static final boolean validUTF16String(char[] s, int size) { + for(int i=0;i= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + if (i < size-1) { + i++; + char nextCH = s[i]; + if (nextCH >= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) { + // Valid surrogate pair + } else + return false; + } else + return false; + } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + // Unmatched low surrogate + return false; + } + + return true; + } + */ + } +} \ No newline at end of file diff --git a/external/Lucene.Net.Light/src/core/Util/Version.cs b/external/Lucene.Net.Light/src/core/Util/Version.cs new file mode 100644 index 0000000000..abc8f20b24 --- /dev/null +++ b/external/Lucene.Net.Light/src/core/Util/Version.cs @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; + +namespace Lucene.Net.Util +{ + /// Use by certain classes to match version compatibility + /// across releases of Lucene. + ///

+ /// WARNING: When changing the version parameter + /// that you supply to components in Lucene, do not simply + /// change the version at search-time, but instead also adjust + /// your indexing code to match, and re-index. + ///

+ public enum Version + { + /// Match settings and bugs in Lucene's 2.0 release. + LUCENE_20, + + /// Match settings and bugs in Lucene's 2.1 release. + LUCENE_21, + + /// Match settings and bugs in Lucene's 2.2 release. + LUCENE_22, + + /// Match settings and bugs in Lucene's 2.3 release. + LUCENE_23, + + /// Match settings and bugs in Lucene's 2.4 release. + LUCENE_24, + + /// Match settings and bugs in Lucene's 2.9 release. + LUCENE_29, + + /// + /// Match settings and bugs in Lucene's 3.0 release. + /// + /// Use this to get the latest and greatest settings, bug fixes, + /// etc, for Lucene. + /// + /// + LUCENE_30, + + // NOTE: Add new constants for later versions **here** to respect order! + + /// + ///

WARNING: if you use this setting, and then + /// upgrade to a newer release of Lucene, sizable changes + /// may happen. If precise back compatibility is important + /// then you should instead explicitly specify an actual + /// version. + /// If you use this constant then you may need to + /// re-index all of your documents when upgrading + /// Lucene, as the way text is indexed may have changed. + /// Additionally, you may need to re-test your entire + /// application to ensure it behaves as expected, as + /// some defaults may have changed and may break functionality + /// in your application. + ///

+ [Obsolete("Use an actual version instead.")] + LUCENE_CURRENT, + } + + public static class VersionEnumExtensions + { + public static bool OnOrAfter(this Version first, Version other) + { + return first.CompareTo(other) >= 0; + } + } +} \ No newline at end of file diff --git a/mcs/build/common/Consts.cs b/mcs/build/common/Consts.cs index ee2c75bd17..30ea3e8a60 100644 --- a/mcs/build/common/Consts.cs +++ b/mcs/build/common/Consts.cs @@ -34,7 +34,7 @@ static class Consts // Use these assembly version constants to make code more maintainable. // - public const string MonoVersion = "4.0.4.0"; + public const string MonoVersion = "4.0.5.0"; public const string MonoCompany = "Mono development team"; public const string MonoProduct = "Mono Common Language Infrastructure"; public const string MonoCopyright = "(c) Various Mono authors"; diff --git a/mcs/class/lib/monolite/Mono.Security.dll.REMOVED.git-id b/mcs/class/lib/monolite/Mono.Security.dll.REMOVED.git-id index 991a517f58..9dab8fe392 100644 --- a/mcs/class/lib/monolite/Mono.Security.dll.REMOVED.git-id +++ b/mcs/class/lib/monolite/Mono.Security.dll.REMOVED.git-id @@ -1 +1 @@ -68b84e24a06bbb6bb78ef224608dc4eba15d5030 \ No newline at end of file +877f4d15de318b35d7977b3ec736321e80fc8340 \ No newline at end of file diff --git a/mcs/class/lib/monolite/System.Configuration.dll.REMOVED.git-id b/mcs/class/lib/monolite/System.Configuration.dll.REMOVED.git-id index e4198b66e9..f949802d23 100644 --- a/mcs/class/lib/monolite/System.Configuration.dll.REMOVED.git-id +++ b/mcs/class/lib/monolite/System.Configuration.dll.REMOVED.git-id @@ -1 +1 @@ -17f4de6638d19cbc68f60a4d532798d87ab7302b \ No newline at end of file +ce9eb809ba177cb9c5f9387d1e2d93f17a583be9 \ No newline at end of file diff --git a/mcs/class/lib/monolite/System.Core.dll.REMOVED.git-id b/mcs/class/lib/monolite/System.Core.dll.REMOVED.git-id index a313e39b2c..b34e41ce9d 100644 --- a/mcs/class/lib/monolite/System.Core.dll.REMOVED.git-id +++ b/mcs/class/lib/monolite/System.Core.dll.REMOVED.git-id @@ -1 +1 @@ -e29dbe6efa356793ba4738a7a2d9858233c5d502 \ No newline at end of file +3f0924534ad9ed3c8f7a21364965d7c59a734f1f \ No newline at end of file diff --git a/mcs/class/lib/monolite/System.Security.dll.REMOVED.git-id b/mcs/class/lib/monolite/System.Security.dll.REMOVED.git-id index 6b95943e32..90395249bc 100644 --- a/mcs/class/lib/monolite/System.Security.dll.REMOVED.git-id +++ b/mcs/class/lib/monolite/System.Security.dll.REMOVED.git-id @@ -1 +1 @@ -2a993e8625896ec805afccd0668cb8f016e84399 \ No newline at end of file +d5cb6b4dd17539ae3babb95e036964746d5bb92e \ No newline at end of file diff --git a/mcs/class/lib/monolite/System.Xml.dll.REMOVED.git-id b/mcs/class/lib/monolite/System.Xml.dll.REMOVED.git-id index 2e68e346d8..5ff1a2ee53 100644 --- a/mcs/class/lib/monolite/System.Xml.dll.REMOVED.git-id +++ b/mcs/class/lib/monolite/System.Xml.dll.REMOVED.git-id @@ -1 +1 @@ -0bf4ff5424f83891362cc450b801e8168bf5b777 \ No newline at end of file +77bdb4eb66375f2b1fb48bbba356b9fe1dfcf78b \ No newline at end of file diff --git a/mcs/class/lib/monolite/System.dll.REMOVED.git-id b/mcs/class/lib/monolite/System.dll.REMOVED.git-id index a147cdf2e5..05a2fbd0a2 100644 --- a/mcs/class/lib/monolite/System.dll.REMOVED.git-id +++ b/mcs/class/lib/monolite/System.dll.REMOVED.git-id @@ -1 +1 @@ -58b35960f78d990b0abdf6138caee16ddcde82c8 \ No newline at end of file +a31ea26acca64e4fefb9e0a7ceec3f546bd99ed0 \ No newline at end of file diff --git a/mcs/class/lib/monolite/basic.exe.REMOVED.git-id b/mcs/class/lib/monolite/basic.exe.REMOVED.git-id index c0db6ac094..2c47b81f0c 100644 --- a/mcs/class/lib/monolite/basic.exe.REMOVED.git-id +++ b/mcs/class/lib/monolite/basic.exe.REMOVED.git-id @@ -1 +1 @@ -32112b9a947cfe490e82c9e32a4201dc758359d9 \ No newline at end of file +d6061e61d5ce653d8bb56a7ad18b7918ab561e40 \ No newline at end of file diff --git a/mcs/class/lib/monolite/mscorlib.dll.REMOVED.git-id b/mcs/class/lib/monolite/mscorlib.dll.REMOVED.git-id index 381d7b4aa7..b276357caa 100644 --- a/mcs/class/lib/monolite/mscorlib.dll.REMOVED.git-id +++ b/mcs/class/lib/monolite/mscorlib.dll.REMOVED.git-id @@ -1 +1 @@ -bf09282b13961dddc7fd50d446f6246c14bff292 \ No newline at end of file +e32acd7ca1cb27d109eaf96067158dacb0ed8e3a \ No newline at end of file diff --git a/mono-core.spec b/mono-core.spec index a8b606954e..d987d8c128 100644 --- a/mono-core.spec +++ b/mono-core.spec @@ -15,7 +15,7 @@ License: LGPL v2.1 only Group: Development/Languages/Mono Summary: A .NET Runtime Environment Url: http://www.mono-project.com -Version: 4.0.4 +Version: 4.0.5 Release: 0 Source0: mono-%{version}.tar.bz2 BuildRequires: bison diff --git a/mono/io-layer/processes.c b/mono/io-layer/processes.c index 439e4e5f32..509f350a37 100644 --- a/mono/io-layer/processes.c +++ b/mono/io-layer/processes.c @@ -811,11 +811,11 @@ gboolean CreateProcess (const gunichar2 *appname, const gunichar2 *cmdline, if (newapp != NULL) { if (appname != NULL) { - newcmd = utf16_concat (newapp, utf16_space, + newcmd = utf16_concat (utf16_quote, newapp, utf16_quote, utf16_space, appname, utf16_space, cmdline, NULL); } else { - newcmd = utf16_concat (newapp, utf16_space, + newcmd = utf16_concat (utf16_quote, newapp, utf16_quote, utf16_space, cmdline, NULL); } diff --git a/mono/metadata/loader.c b/mono/metadata/loader.c index 33cd384f43..0825fc48ca 100644 --- a/mono/metadata/loader.c +++ b/mono/metadata/loader.c @@ -45,6 +45,7 @@ #include #include #include +#include MonoDefaults mono_defaults; @@ -1420,7 +1421,7 @@ mono_lookup_pinvoke_call (MonoMethod *method, const char **exc_class, const char const char *new_scope; char *error_msg; char *full_name, *file_name, *found_name = NULL; - int i; + int i,j; MonoDl *module = NULL; gboolean cached = FALSE; @@ -1568,23 +1569,85 @@ mono_lookup_pinvoke_call (MonoMethod *method, const char **exc_class, const char } if (!module && !is_absolute) { - void *iter = NULL; - char *mdirname = g_path_get_dirname (image->name); - while ((full_name = mono_dl_build_path (mdirname, file_name, &iter))) { - module = cached_module_load (full_name, MONO_DL_LAZY, &error_msg); - if (!module) { - mono_trace (G_LOG_LEVEL_INFO, MONO_TRACE_DLLIMPORT, - "DllImport error loading library '%s': '%s'.", - full_name, error_msg); - g_free (error_msg); - } else { - found_name = g_strdup (full_name); + void *iter; + char *mdirname; + + for (j = 0; j < 3; ++j) { + iter = NULL; + mdirname = NULL; + switch (j) { + case 0: + mdirname = g_path_get_dirname (image->name); + break; + case 1: /* @executable_path@/../lib */ + { + char buf [4096]; + int binl; + binl = mono_dl_get_executable_path (buf, sizeof (buf)); + if (binl != -1) { + char *base, *newbase; + char *resolvedname; + buf [binl] = 0; + resolvedname = mono_path_resolve_symlinks (buf); + + base = g_path_get_dirname (resolvedname); + newbase = g_path_get_dirname(base); + mdirname = g_strdup_printf ("%s/lib", newbase); + + g_free (resolvedname); + g_free (base); + g_free (newbase); + } + break; + } +#ifdef __MACH__ + case 2: /* @executable_path@/../Libraries */ + { + char buf [4096]; + int binl; + binl = mono_dl_get_executable_path (buf, sizeof (buf)); + if (binl != -1) { + char *base, *newbase; + char *resolvedname; + buf [binl] = 0; + resolvedname = mono_path_resolve_symlinks (buf); + + base = g_path_get_dirname (resolvedname); + newbase = g_path_get_dirname(base); + mdirname = g_strdup_printf ("%s/Libraries", newbase); + + g_free (resolvedname); + g_free (base); + g_free (newbase); + } + break; + } +#endif } - g_free (full_name); + + if (!mdirname) + continue; + + while ((full_name = mono_dl_build_path (mdirname, file_name, &iter))) { + module = cached_module_load (full_name, MONO_DL_LAZY, &error_msg); + if (!module) { + mono_trace (G_LOG_LEVEL_INFO, MONO_TRACE_DLLIMPORT, + "DllImport error loading library '%s': '%s'.", + full_name, error_msg); + g_free (error_msg); + } else { + found_name = g_strdup (full_name); + } + g_free (full_name); + if (module) + break; + + } + g_free (mdirname); if (module) break; } - g_free (mdirname); + } if (!module) { diff --git a/mono/mini/Makefile.am b/mono/mini/Makefile.am index a47fa14ad8..f9134d60b4 100644 --- a/mono/mini/Makefile.am +++ b/mono/mini/Makefile.am @@ -759,7 +759,7 @@ EXTRA_DIST = TestDriver.cs ldscript ldscript.mono \ Makefile.am.in version.h: Makefile - echo "#define FULL_VERSION \"Stable 4.0.4.1/5ab4c0d\"" > version.h + echo "#define FULL_VERSION \"Stable 4.0.5.1/1d8d582\"" > version.h # Utility target for patching libtool to speed up linking patch-libtool: diff --git a/mono/mini/Makefile.am.in b/mono/mini/Makefile.am.in index a47fa14ad8..f9134d60b4 100755 --- a/mono/mini/Makefile.am.in +++ b/mono/mini/Makefile.am.in @@ -759,7 +759,7 @@ EXTRA_DIST = TestDriver.cs ldscript ldscript.mono \ Makefile.am.in version.h: Makefile - echo "#define FULL_VERSION \"Stable 4.0.4.1/5ab4c0d\"" > version.h + echo "#define FULL_VERSION \"Stable 4.0.5.1/1d8d582\"" > version.h # Utility target for patching libtool to speed up linking patch-libtool: diff --git a/mono/mini/Makefile.in.REMOVED.git-id b/mono/mini/Makefile.in.REMOVED.git-id index 9b33a1aa82..c68b943b1d 100644 --- a/mono/mini/Makefile.in.REMOVED.git-id +++ b/mono/mini/Makefile.in.REMOVED.git-id @@ -1 +1 @@ -8959e291e0e5f26397d9b96d257b52820f9acee0 \ No newline at end of file +5804298b3a62c2db5b469e57d8236ad19d9cb527 \ No newline at end of file diff --git a/mono/mini/version.h b/mono/mini/version.h index 86d6529afd..f2001e77c0 100644 --- a/mono/mini/version.h +++ b/mono/mini/version.h @@ -1 +1 @@ -#define FULL_VERSION "Stable 4.0.4.1/5ab4c0d" +#define FULL_VERSION "Stable 4.0.5.1/1d8d582" diff --git a/po/mcs/de.gmo b/po/mcs/de.gmo index 2787ca156e..fe285c3dba 100644 Binary files a/po/mcs/de.gmo and b/po/mcs/de.gmo differ diff --git a/po/mcs/de.po.REMOVED.git-id b/po/mcs/de.po.REMOVED.git-id index 03b8181749..3211242564 100644 --- a/po/mcs/de.po.REMOVED.git-id +++ b/po/mcs/de.po.REMOVED.git-id @@ -1 +1 @@ -e87480f17a6c70946d156af39e4c2b189a80a620 \ No newline at end of file +af6d42f89b3e6ec4f2104fbe3d12396451db937c \ No newline at end of file diff --git a/po/mcs/es.gmo b/po/mcs/es.gmo index da0893863c..e6f337e323 100644 Binary files a/po/mcs/es.gmo and b/po/mcs/es.gmo differ diff --git a/po/mcs/es.po.REMOVED.git-id b/po/mcs/es.po.REMOVED.git-id index c74b0f8d09..44345d585a 100644 --- a/po/mcs/es.po.REMOVED.git-id +++ b/po/mcs/es.po.REMOVED.git-id @@ -1 +1 @@ -2d59fc55951ca122f9721573dc99b29f419e68f4 \ No newline at end of file +20aa5e33333c7cf6a0ada4cf3fce6afb026c09ea \ No newline at end of file diff --git a/po/mcs/ja.gmo b/po/mcs/ja.gmo index 3e1e4e5cac..4590e1ef35 100644 Binary files a/po/mcs/ja.gmo and b/po/mcs/ja.gmo differ diff --git a/po/mcs/ja.po.REMOVED.git-id b/po/mcs/ja.po.REMOVED.git-id index 9e2ebf40e7..e8bfc67da6 100644 --- a/po/mcs/ja.po.REMOVED.git-id +++ b/po/mcs/ja.po.REMOVED.git-id @@ -1 +1 @@ -da5278e6aae3a989cf1ff0239c17fa754ba52af5 \ No newline at end of file +45c1ad216f77aaed90b3f6683a2d3269ed62abeb \ No newline at end of file diff --git a/po/mcs/mcs.pot b/po/mcs/mcs.pot index 87304ae232..9cd32fca17 100644 --- a/po/mcs/mcs.pot +++ b/po/mcs/mcs.pot @@ -6,9 +6,9 @@ #, fuzzy msgid "" msgstr "" -"Project-Id-Version: mono 4.0.4\n" +"Project-Id-Version: mono 4.0.5\n" "Report-Msgid-Bugs-To: http://www.mono-project.com/Bugs\n" -"POT-Creation-Date: 2015-08-25 18:21-0400\n" +"POT-Creation-Date: 2015-11-10 09:14-0500\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" diff --git a/po/mcs/pt_BR.gmo b/po/mcs/pt_BR.gmo index 8d0a1e3800..b09254f78d 100644 Binary files a/po/mcs/pt_BR.gmo and b/po/mcs/pt_BR.gmo differ diff --git a/po/mcs/pt_BR.po.REMOVED.git-id b/po/mcs/pt_BR.po.REMOVED.git-id index 59371dc70b..f9e24d36b1 100644 --- a/po/mcs/pt_BR.po.REMOVED.git-id +++ b/po/mcs/pt_BR.po.REMOVED.git-id @@ -1 +1 @@ -cd2f738943dff96c92171c37e6c30716428ffd26 \ No newline at end of file +dc1a4233de7851f2c74dad05ddf51f167bd5a1f5 \ No newline at end of file