Imported Upstream version 4.0.5.1

Former-commit-id: d28a93322098597c784fac59bbd2f5ec23edc4ab
This commit is contained in:
Jo Shields
2015-11-10 14:44:38 +00:00
parent 363056e66e
commit 183bba2c9a
454 changed files with 91847 additions and 39 deletions

6
external/Lucene.Net.Light/README.md vendored Normal file
View File

@@ -0,0 +1,6 @@
This is a subset of Lucene.Net as used by Mono's Monodoc tool.
This module is a checout of:
git://github.com/mono/lucene.net.git
88fb67b07621dfed054d8d75fd50672fb26349df

View File

@@ -0,0 +1 @@
61338700516ffa26e2a36fef4a0843a5fbf01c62

View File

@@ -0,0 +1,171 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Documents;
using Lucene.Net.Store;
using Lucene.Net.Util;
namespace Lucene.Net.Analysis
{
/// <summary>An Analyzer builds TokenStreams, which analyze text. It thus represents a
/// policy for extracting index terms from text.
/// <p/>
/// Typical implementations first build a Tokenizer, which breaks the stream of
/// characters from the Reader into raw Tokens. One or more TokenFilters may
/// then be applied to the output of the Tokenizer.
/// </summary>
public abstract class Analyzer : IDisposable
{
/// <summary>Creates a TokenStream which tokenizes all the text in the provided
/// Reader. Must be able to handle null field name for
/// backward compatibility.
/// </summary>
public abstract TokenStream TokenStream(String fieldName, System.IO.TextReader reader);
/// <summary>Creates a TokenStream that is allowed to be re-used
/// from the previous time that the same thread called
/// this method. Callers that do not need to use more
/// than one TokenStream at the same time from this
/// analyzer should use this method for better
/// performance.
/// </summary>
public virtual TokenStream ReusableTokenStream(String fieldName, System.IO.TextReader reader)
{
return TokenStream(fieldName, reader);
}
private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
private bool isDisposed;
/// <summary>Used by Analyzers that implement reusableTokenStream
/// to retrieve previously saved TokenStreams for re-use
/// by the same thread.
/// </summary>
protected internal virtual object PreviousTokenStream
{
get
{
if (tokenStreams == null)
{
throw new AlreadyClosedException("this Analyzer is closed");
}
return tokenStreams.Get();
}
set
{
if (tokenStreams == null)
{
throw new AlreadyClosedException("this Analyzer is closed");
}
tokenStreams.Set(value);
}
}
[Obsolete()]
protected internal bool overridesTokenStreamMethod = false;
/// <deprecated> This is only present to preserve
/// back-compat of classes that subclass a core analyzer
/// and override tokenStream but not reusableTokenStream
/// </deprecated>
/// <summary>
/// Java uses Class&lt;? extends Analyer&gt; to constrain <typeparamref name="TClass"/> to
/// only Types that inherit from Analyzer. C# does not have a generic type class,
/// ie Type&lt;t&gt;. The method signature stays the same, and an exception may
/// still be thrown, if the method doesn't exist.
/// </summary>
[Obsolete("This is only present to preserve back-compat of classes that subclass a core analyzer and override tokenStream but not reusableTokenStream ")]
protected internal virtual void SetOverridesTokenStreamMethod<TClass>()
where TClass : Analyzer
{
try
{
System.Reflection.MethodInfo m = this.GetType().GetMethod("TokenStream", new[] { typeof(string), typeof(System.IO.TextReader) });
overridesTokenStreamMethod = m.DeclaringType != typeof(TClass);
}
catch (MethodAccessException)
{
// can't happen, as baseClass is subclass of Analyzer
overridesTokenStreamMethod = false;
}
}
/// <summary> Invoked before indexing a Fieldable instance if
/// terms have already been added to that field. This allows custom
/// analyzers to place an automatic position increment gap between
/// Fieldable instances using the same field name. The default value
/// position increment gap is 0. With a 0 position increment gap and
/// the typical default token position increment of 1, all terms in a field,
/// including across Fieldable instances, are in successive positions, allowing
/// exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
///
/// </summary>
/// <param name="fieldName">Fieldable name being indexed.
/// </param>
/// <returns> position increment gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
/// </returns>
public virtual int GetPositionIncrementGap(String fieldName)
{
return 0;
}
/// <summary> Just like <see cref="GetPositionIncrementGap" />, except for
/// Token offsets instead. By default this returns 1 for
/// tokenized fields and, as if the fields were joined
/// with an extra space character, and 0 for un-tokenized
/// fields. This method is only called if the field
/// produced at least one token for indexing.
///
/// </summary>
/// <param name="field">the field just indexed
/// </param>
/// <returns> offset gap, added to the next token emitted from <see cref="TokenStream(String,System.IO.TextReader)" />
/// </returns>
public virtual int GetOffsetGap(IFieldable field)
{
return field.IsTokenized ? 1 : 0;
}
/// <summary>Frees persistent resources used by this Analyzer </summary>
public void Close()
{
Dispose();
}
public virtual void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (isDisposed) return;
if (disposing)
{
if (tokenStreams != null)
{
tokenStreams.Close();
tokenStreams = null;
}
}
isDisposed = true;
}
}
}

View File

@@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Support;
using Lucene.Net.Util;
namespace Lucene.Net.Analysis
{
/// <summary>
/// * Base utility class for implementing a <see cref="CharFilter" />.
/// * You subclass this, and then record mappings by calling
/// * <see cref="AddOffCorrectMap" />, and then invoke the correct
/// * method to correct an offset.
/// </summary>
public abstract class BaseCharFilter : CharFilter
{
private int[] offsets;
private int[] diffs;
private int size = 0;
protected BaseCharFilter(CharStream @in) : base(@in)
{
}
/* Retrieve the corrected offset. */
//@Override
protected internal override int Correct(int currentOff)
{
if (offsets == null || currentOff < offsets[0])
{
return currentOff;
}
int hi = size - 1;
if (currentOff >= offsets[hi])
return currentOff + diffs[hi];
int lo = 0;
int mid = -1;
while (hi >= lo)
{
mid = Number.URShift(lo + hi, 1);
if (currentOff < offsets[mid])
hi = mid - 1;
else if (currentOff > offsets[mid])
lo = mid + 1;
else
return currentOff + diffs[mid];
}
if (currentOff < offsets[mid])
return mid == 0 ? currentOff : currentOff + diffs[mid - 1];
return currentOff + diffs[mid];
}
protected int LastCumulativeDiff
{
get
{
return offsets == null ? 0 : diffs[size - 1];
}
}
[Obsolete("Use LastCumulativeDiff property instead")]
protected int GetLastCumulativeDiff()
{
return LastCumulativeDiff;
}
protected void AddOffCorrectMap(int off, int cumulativeDiff)
{
if (offsets == null)
{
offsets = new int[64];
diffs = new int[64];
}
else if (size == offsets.Length)
{
offsets = ArrayUtil.Grow(offsets);
diffs = ArrayUtil.Grow(diffs);
}
offsets[size] = off;
diffs[size++] = cumulativeDiff;
}
}
}

View File

@@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace Lucene.Net.Analysis
{
/// <summary> This class can be used if the token attributes of a TokenStream
/// are intended to be consumed more than once. It caches
/// all token attribute states locally in a List.
///
/// <p/>CachingTokenFilter implements the optional method
/// <see cref="TokenStream.Reset()" />, which repositions the
/// stream to the first Token.
/// </summary>
public sealed class CachingTokenFilter : TokenFilter
{
private System.Collections.Generic.LinkedList<State> cache = null;
private System.Collections.Generic.IEnumerator<State> iterator = null;
private State finalState;
public CachingTokenFilter(TokenStream input):base(input)
{
}
public override bool IncrementToken()
{
if (cache == null)
{
// fill cache lazily
cache = new System.Collections.Generic.LinkedList<State>();
FillCache();
iterator = cache.GetEnumerator();
}
if (!iterator.MoveNext())
{
// the cache is exhausted, return false
return false;
}
// Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
RestoreState(iterator.Current);
return true;
}
public override void End()
{
if (finalState != null)
{
RestoreState(finalState);
}
}
public override void Reset()
{
if (cache != null)
{
iterator = cache.GetEnumerator();
}
}
private void FillCache()
{
while (input.IncrementToken())
{
cache.AddLast(CaptureState());
}
// capture final state
input.End();
finalState = CaptureState();
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace Lucene.Net.Analysis
{
/// <summary> Subclasses of CharFilter can be chained to filter CharStream.
/// They can be used as <see cref="System.IO.TextReader" /> with additional offset
/// correction. <see cref="Tokenizer" />s will automatically use <see cref="CorrectOffset" />
/// if a CharFilter/CharStream subclass is used.
///
/// </summary>
/// <version> $Id$
///
/// </version>
public abstract class CharFilter : CharStream
{
private long currentPosition = -1;
private bool isDisposed;
protected internal CharStream input;
protected internal CharFilter(CharStream in_Renamed) : base(in_Renamed)
{
input = in_Renamed;
}
/// <summary>Subclass may want to override to correct the current offset.</summary>
/// <param name="currentOff">current offset</param>
/// <returns>corrected offset</returns>
protected internal virtual int Correct(int currentOff)
{
return currentOff;
}
/// <summary> Chains the corrected offset through the input
/// CharFilter.
/// </summary>
public override int CorrectOffset(int currentOff)
{
return input.CorrectOffset(Correct(currentOff));
}
protected override void Dispose(bool disposing)
{
if (isDisposed) return;
if (disposing)
{
if (input != null)
{
input.Close();
}
}
input = null;
isDisposed = true;
base.Dispose(disposing);
}
public override int Read(System.Char[] cbuf, int off, int len)
{
return input.Read(cbuf, off, len);
}
public bool MarkSupported()
{
return input.BaseStream.CanSeek;
}
public void Mark(int readAheadLimit)
{
currentPosition = input.BaseStream.Position;
input.BaseStream.Position = readAheadLimit;
}
public void Reset()
{
input.BaseStream.Position = currentPosition;
}
}
}

View File

@@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace Lucene.Net.Analysis
{
/// <summary> CharReader is a Reader wrapper. It reads chars from
/// Reader and outputs <see cref="CharStream" />, defining an
/// identify function <see cref="CorrectOffset" /> method that
/// simply returns the provided offset.
/// </summary>
public sealed class CharReader:CharStream
{
private long currentPosition = -1;
private bool isDisposed;
internal System.IO.StreamReader input;
public static CharStream Get(System.IO.TextReader input)
{
var charStream = input as CharStream;
if (charStream != null)
return charStream;
// {{Aroush-2.9}} isn't there a better (faster) way to do this?
var theString = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(input.ReadToEnd()));
return new CharReader(new System.IO.StreamReader(theString));
//return input is CharStream?(CharStream) input:new CharReader(input);
}
private CharReader(System.IO.StreamReader in_Renamed) : base(in_Renamed)
{
input = in_Renamed;
}
public override int CorrectOffset(int currentOff)
{
return currentOff;
}
protected override void Dispose(bool disposing)
{
if (isDisposed) return;
if (disposing)
{
if (input != null)
{
input.Close();
}
}
input = null;
isDisposed = true;
base.Dispose(disposing);
}
public override int Read(System.Char[] cbuf, int off, int len)
{
return input.Read(cbuf, off, len);
}
public bool MarkSupported()
{
return input.BaseStream.CanSeek;
}
public void Mark(int readAheadLimit)
{
currentPosition = input.BaseStream.Position;
input.BaseStream.Position = readAheadLimit;
}
public void Reset()
{
input.BaseStream.Position = currentPosition;
}
}
}

View File

@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace Lucene.Net.Analysis
{
/// <summary> CharStream adds <see cref="CorrectOffset" />
/// functionality over <see cref="System.IO.TextReader" />. All Tokenizers accept a
/// CharStream instead of <see cref="System.IO.TextReader" /> as input, which enables
/// arbitrary character based filtering before tokenization.
/// The <see cref="CorrectOffset" /> method fixed offsets to account for
/// removal or insertion of characters, so that the offsets
/// reported in the tokens match the character offsets of the
/// original Reader.
/// </summary>
public abstract class CharStream : System.IO.StreamReader
{
protected CharStream(System.IO.StreamReader reader) : base(reader.BaseStream)
{
}
/// <summary> Called by CharFilter(s) and Tokenizer to correct token offset.
///
/// </summary>
/// <param name="currentOff">offset as seen in the output
/// </param>
/// <returns> corrected offset based on the input
/// </returns>
public abstract int CorrectOffset(int currentOff);
}
}

View File

@@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Lucene.Net.Analysis.Tokenattributes;
using AttributeSource = Lucene.Net.Util.AttributeSource;
namespace Lucene.Net.Analysis
{
/// <summary>An abstract base class for simple, character-oriented tokenizers.</summary>
public abstract class CharTokenizer:Tokenizer
{
protected CharTokenizer(System.IO.TextReader input):base(input)
{
offsetAtt = AddAttribute<IOffsetAttribute>();
termAtt = AddAttribute<ITermAttribute>();
}
protected CharTokenizer(AttributeSource source, System.IO.TextReader input):base(source, input)
{
offsetAtt = AddAttribute<IOffsetAttribute>();
termAtt = AddAttribute<ITermAttribute>();
}
protected CharTokenizer(AttributeFactory factory, System.IO.TextReader input):base(factory, input)
{
offsetAtt = AddAttribute<IOffsetAttribute>();
termAtt = AddAttribute<ITermAttribute>();
}
private int offset = 0, bufferIndex = 0, dataLen = 0;
private const int MAX_WORD_LEN = 255;
private const int IO_BUFFER_SIZE = 4096;
private readonly char[] ioBuffer = new char[IO_BUFFER_SIZE];
private readonly ITermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;
/// <summary>Returns true iff a character should be included in a token. This
/// tokenizer generates as tokens adjacent sequences of characters which
/// satisfy this predicate. Characters for which this is false are used to
/// define token boundaries and are not included in tokens.
/// </summary>
protected internal abstract bool IsTokenChar(char c);
/// <summary>Called on each token character to normalize it before it is added to the
/// token. The default implementation does nothing. Subclasses may use this
/// to, e.g., lowercase tokens.
/// </summary>
protected internal virtual char Normalize(char c)
{
return c;
}
public override bool IncrementToken()
{
ClearAttributes();
int length = 0;
int start = bufferIndex;
char[] buffer = termAtt.TermBuffer();
while (true)
{
if (bufferIndex >= dataLen)
{
offset += dataLen;
dataLen = input.Read(ioBuffer, 0, ioBuffer.Length);
if (dataLen <= 0)
{
dataLen = 0; // so next offset += dataLen won't decrement offset
if (length > 0)
break;
return false;
}
bufferIndex = 0;
}
char c = ioBuffer[bufferIndex++];
if (IsTokenChar(c))
{
// if it's a token char
if (length == 0)
// start of token
start = offset + bufferIndex - 1;
else if (length == buffer.Length)
buffer = termAtt.ResizeTermBuffer(1 + length);
buffer[length++] = Normalize(c); // buffer it, normalized
if (length == MAX_WORD_LEN)
// buffer overflow!
break;
}
else if (length > 0)
// at non-Letter w/ chars
break; // return 'em
}
termAtt.SetTermLength(length);
offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(start + length));
return true;
}
public override void End()
{
// set final offset
int finalOffset = CorrectOffset(offset);
offsetAtt.SetOffset(finalOffset, finalOffset);
}
public override void Reset(System.IO.TextReader input)
{
base.Reset(input);
bufferIndex = 0;
offset = 0;
dataLen = 0;
}
}
}

View File

@@ -0,0 +1,344 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Analysis.Tokenattributes;
namespace Lucene.Net.Analysis
{
/// <summary> A filter that replaces accented characters in the ISO Latin 1 character set
/// (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
/// <p/>
/// For instance, '&#192;' will be replaced by 'a'.
/// <p/>
///
/// </summary>
/// <deprecated> If you build a new index, use <see cref="ASCIIFoldingFilter"/>
/// which covers a superset of Latin 1.
/// This class is included for use with existing indexes and will be removed
/// in a future release (possible Lucene 4.0)
/// </deprecated>
[Obsolete("If you build a new index, use ASCIIFoldingFilter which covers a superset of Latin 1. This class is included for use with existing indexes and will be removed in a future release (possible Lucene 4.0).")]
public class ISOLatin1AccentFilter : TokenFilter
{
public ISOLatin1AccentFilter(TokenStream input):base(input)
{
termAtt = AddAttribute<ITermAttribute>();
}
private char[] output = new char[256];
private int outputPos;
private readonly ITermAttribute termAtt;
public override bool IncrementToken()
{
if (input.IncrementToken())
{
char[] buffer = termAtt.TermBuffer();
int length = termAtt.TermLength();
// If no characters actually require rewriting then we
// just return token as-is:
for (int i = 0; i < length; i++)
{
char c = buffer[i];
if (c >= '\u00c0' && c <= '\uFB06')
{
RemoveAccents(buffer, length);
termAtt.SetTermBuffer(output, 0, outputPos);
break;
}
}
return true;
}
return false;
}
/// <summary> To replace accented characters in a String by unaccented equivalents.</summary>
public void RemoveAccents(char[] input, int length)
{
// Worst-case length required:
int maxSizeNeeded = 2 * length;
int size = output.Length;
while (size < maxSizeNeeded)
size *= 2;
if (size != output.Length)
output = new char[size];
outputPos = 0;
int pos = 0;
for (int i = 0; i < length; i++, pos++)
{
char c = input[pos];
// Quick test: if it's not in range then just keep
// current character
if (c < '\u00c0' || c > '\uFB06')
output[outputPos++] = c;
else
{
switch (c)
{
case '\u00C0':
// À
case '\u00C1':
// �?
case '\u00C2':
// Â
case '\u00C3':
// Ã
case '\u00C4':
// Ä
case '\u00C5': // Ã…
output[outputPos++] = 'A';
break;
case '\u00C6': // Æ
output[outputPos++] = 'A';
output[outputPos++] = 'E';
break;
case '\u00C7': // Ç
output[outputPos++] = 'C';
break;
case '\u00C8':
// È
case '\u00C9':
// É
case '\u00CA':
// Ê
case '\u00CB': // Ë
output[outputPos++] = 'E';
break;
case '\u00CC':
// Ì
case '\u00CD':
// �?
case '\u00CE':
// ÃŽ
case '\u00CF': // �?
output[outputPos++] = 'I';
break;
case '\u0132': // IJ
output[outputPos++] = 'I';
output[outputPos++] = 'J';
break;
case '\u00D0': // �?
output[outputPos++] = 'D';
break;
case '\u00D1': // Ñ
output[outputPos++] = 'N';
break;
case '\u00D2':
// Ã’
case '\u00D3':
// Ó
case '\u00D4':
// Ô
case '\u00D5':
// Õ
case '\u00D6':
// Ö
case '\u00D8': // Ø
output[outputPos++] = 'O';
break;
case '\u0152': // Å’
output[outputPos++] = 'O';
output[outputPos++] = 'E';
break;
case '\u00DE': // Þ
output[outputPos++] = 'T';
output[outputPos++] = 'H';
break;
case '\u00D9':
// Ù
case '\u00DA':
// Ú
case '\u00DB':
// Û
case '\u00DC': // Ü
output[outputPos++] = 'U';
break;
case '\u00DD':
// �?
case '\u0178': // Ÿ
output[outputPos++] = 'Y';
break;
case '\u00E0':
// à
case '\u00E1':
// á
case '\u00E2':
// â
case '\u00E3':
// ã
case '\u00E4':
// ä
case '\u00E5': // å
output[outputPos++] = 'a';
break;
case '\u00E6': // æ
output[outputPos++] = 'a';
output[outputPos++] = 'e';
break;
case '\u00E7': // ç
output[outputPos++] = 'c';
break;
case '\u00E8':
// è
case '\u00E9':
// é
case '\u00EA':
// ê
case '\u00EB': // ë
output[outputPos++] = 'e';
break;
case '\u00EC':
// ì
case '\u00ED':
// í
case '\u00EE':
// î
case '\u00EF': // ï
output[outputPos++] = 'i';
break;
case '\u0133': // ij
output[outputPos++] = 'i';
output[outputPos++] = 'j';
break;
case '\u00F0': // ð
output[outputPos++] = 'd';
break;
case '\u00F1': // ñ
output[outputPos++] = 'n';
break;
case '\u00F2':
// ò
case '\u00F3':
// ó
case '\u00F4':
// ô
case '\u00F5':
// õ
case '\u00F6':
// ö
case '\u00F8': // ø
output[outputPos++] = 'o';
break;
case '\u0153': // Å“
output[outputPos++] = 'o';
output[outputPos++] = 'e';
break;
case '\u00DF': // ß
output[outputPos++] = 's';
output[outputPos++] = 's';
break;
case '\u00FE': // þ
output[outputPos++] = 't';
output[outputPos++] = 'h';
break;
case '\u00F9':
// ù
case '\u00FA':
// ú
case '\u00FB':
// û
case '\u00FC': // ü
output[outputPos++] = 'u';
break;
case '\u00FD':
// ý
case '\u00FF': // ÿ
output[outputPos++] = 'y';
break;
case '\uFB00': // ff
output[outputPos++] = 'f';
output[outputPos++] = 'f';
break;
case '\uFB01': // �?
output[outputPos++] = 'f';
output[outputPos++] = 'i';
break;
case '\uFB02': // fl
output[outputPos++] = 'f';
output[outputPos++] = 'l';
break;
// following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive)
// case '\uFB03': // ffi
// output[outputPos++] = 'f';
// output[outputPos++] = 'f';
// output[outputPos++] = 'i';
// break;
// case '\uFB04': // ffl
// output[outputPos++] = 'f';
// output[outputPos++] = 'f';
// output[outputPos++] = 'l';
// break;
case '\uFB05': // ſt
output[outputPos++] = 'f';
output[outputPos++] = 't';
break;
case '\uFB06': // st
output[outputPos++] = 's';
output[outputPos++] = 't';
break;
default:
output[outputPos++] = c;
break;
}
}
}
}
}
}

View File

@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
namespace Lucene.Net.Analysis
{
/// <summary> "Tokenizes" the entire stream as a single token. This is useful
/// for data like zip codes, ids, and some product names.
/// </summary>
public class KeywordAnalyzer:Analyzer
{
public KeywordAnalyzer()
{
SetOverridesTokenStreamMethod<KeywordAnalyzer>();
}
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
return new KeywordTokenizer(reader);
}
public override TokenStream ReusableTokenStream(System.String fieldName, System.IO.TextReader reader)
{
if (overridesTokenStreamMethod)
{
// LUCENE-1678: force fallback to tokenStream() if we
// have been subclassed and that subclass overrides
// tokenStream but not reusableTokenStream
return TokenStream(fieldName, reader);
}
var tokenizer = (Tokenizer) PreviousTokenStream;
if (tokenizer == null)
{
tokenizer = new KeywordTokenizer(reader);
PreviousTokenStream = tokenizer;
}
else
tokenizer.Reset(reader);
return tokenizer;
}
}
}

View File

@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Lucene.Net.Analysis.Tokenattributes;
using AttributeSource = Lucene.Net.Util.AttributeSource;
namespace Lucene.Net.Analysis
{
/// <summary> Emits the entire input as a single token.</summary>
public sealed class KeywordTokenizer:Tokenizer
{
private const int DEFAULT_BUFFER_SIZE = 256;
private bool done;
private int finalOffset;
private ITermAttribute termAtt;
private IOffsetAttribute offsetAtt;
public KeywordTokenizer(System.IO.TextReader input):this(input, DEFAULT_BUFFER_SIZE)
{
}
public KeywordTokenizer(System.IO.TextReader input, int bufferSize):base(input)
{
Init(bufferSize);
}
public KeywordTokenizer(AttributeSource source, System.IO.TextReader input, int bufferSize):base(source, input)
{
Init(bufferSize);
}
public KeywordTokenizer(AttributeFactory factory, System.IO.TextReader input, int bufferSize):base(factory, input)
{
Init(bufferSize);
}
private void Init(int bufferSize)
{
this.done = false;
termAtt = AddAttribute<ITermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
termAtt.ResizeTermBuffer(bufferSize);
}
public override bool IncrementToken()
{
if (!done)
{
ClearAttributes();
done = true;
int upto = 0;
char[] buffer = termAtt.TermBuffer();
while (true)
{
int length = input.Read(buffer, upto, buffer.Length - upto);
if (length == 0)
break;
upto += length;
if (upto == buffer.Length)
buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
}
termAtt.SetTermLength(upto);
finalOffset = CorrectOffset(upto);
offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
return true;
}
return false;
}
public override void End()
{
// set final offset
offsetAtt.SetOffset(finalOffset, finalOffset);
}
public override void Reset(System.IO.TextReader input)
{
base.Reset(input);
this.done = false;
}
}
}

View File

@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Lucene.Net.Analysis.Tokenattributes;
namespace Lucene.Net.Analysis
{
/// <summary>Removes words that are too long or too short from the stream.</summary>
public sealed class LengthFilter:TokenFilter
{
internal int min;
internal int max;
private readonly ITermAttribute termAtt;
/// <summary> Build a filter that removes words that are too long or too
/// short from the text.
/// </summary>
public LengthFilter(TokenStream in_Renamed, int min, int max)
: base(in_Renamed)
{
this.min = min;
this.max = max;
termAtt = AddAttribute<ITermAttribute>();
}
/// <summary> Returns the next input Token whose term() is the right len</summary>
public override bool IncrementToken()
{
// return the first non-stop word found
while (input.IncrementToken())
{
var len = termAtt.TermLength();
if (len >= min && len <= max)
{
return true;
}
// note: else we ignore it but should we index each part of it?
}
// reached EOS -- return false
return false;
}
}
}

View File

@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AttributeSource = Lucene.Net.Util.AttributeSource;
namespace Lucene.Net.Analysis
{
/// <summary>A LetterTokenizer is a tokenizer that divides text at non-letters. That's
/// to say, it defines tokens as maximal strings of adjacent letters, as defined
/// by java.lang.Character.isLetter() predicate.
/// Note: this does a decent job for most European languages, but does a terrible
/// job for some Asian languages, where words are not separated by spaces.
/// </summary>
public class LetterTokenizer:CharTokenizer
{
/// <summary>Construct a new LetterTokenizer. </summary>
public LetterTokenizer(System.IO.TextReader @in):base(@in)
{
}
/// <summary>Construct a new LetterTokenizer using a given <see cref="AttributeSource" />. </summary>
public LetterTokenizer(AttributeSource source, System.IO.TextReader @in)
: base(source, @in)
{
}
/// <summary>Construct a new LetterTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
public LetterTokenizer(AttributeFactory factory, System.IO.TextReader @in)
: base(factory, @in)
{
}
/// <summary>Collects only characters which satisfy
/// <see cref="char.IsLetter(char)" />.
/// </summary>
protected internal override bool IsTokenChar(char c)
{
return System.Char.IsLetter(c);
}
}
}

View File

@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Lucene.Net.Analysis.Tokenattributes;
namespace Lucene.Net.Analysis
{
/// <summary>Normalizes token text to lower case.</summary>
public sealed class LowerCaseFilter:TokenFilter
{
public LowerCaseFilter(TokenStream @in)
: base(@in)
{
termAtt = AddAttribute<ITermAttribute>();
}
private readonly ITermAttribute termAtt;
public override bool IncrementToken()
{
if (input.IncrementToken())
{
char[] buffer = termAtt.TermBuffer();
int length = termAtt.TermLength();
for (int i = 0; i < length; i++)
buffer[i] = System.Char.ToLower(buffer[i]);
return true;
}
return false;
}
}
}

View File

@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AttributeSource = Lucene.Net.Util.AttributeSource;
namespace Lucene.Net.Analysis
{
/// <summary> LowerCaseTokenizer performs the function of LetterTokenizer
/// and LowerCaseFilter together. It divides text at non-letters and converts
/// them to lower case. While it is functionally equivalent to the combination
/// of LetterTokenizer and LowerCaseFilter, there is a performance advantage
/// to doing the two tasks at once, hence this (redundant) implementation.
/// <p/>
/// Note: this does a decent job for most European languages, but does a terrible
/// job for some Asian languages, where words are not separated by spaces.
/// </summary>
public sealed class LowerCaseTokenizer:LetterTokenizer
{
/// <summary>Construct a new LowerCaseTokenizer. </summary>
public LowerCaseTokenizer(System.IO.TextReader @in)
: base(@in)
{
}
/// <summary>Construct a new LowerCaseTokenizer using a given <see cref="AttributeSource" />. </summary>
public LowerCaseTokenizer(AttributeSource source, System.IO.TextReader @in)
: base(source, @in)
{
}
/// <summary>Construct a new LowerCaseTokenizer using a given <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />. </summary>
public LowerCaseTokenizer(AttributeFactory factory, System.IO.TextReader @in)
: base(factory, @in)
{
}
/// <summary>Converts char to lower case
/// <see cref="char.ToLower(char)" />.
/// </summary>
protected internal override char Normalize(char c)
{
return System.Char.ToLower(c);
}
}
}

View File

@@ -0,0 +1,166 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System.Collections.Generic;
namespace Lucene.Net.Analysis
{
/// <summary> Simplistic <see cref="CharFilter" /> that applies the mappings
/// contained in a <see cref="NormalizeCharMap" /> to the character
/// stream, and correcting the resulting changes to the
/// offsets.
/// </summary>
public class MappingCharFilter : BaseCharFilter
{
private readonly NormalizeCharMap normMap;
private LinkedList<char> buffer;
private System.String replacement;
private int charPointer;
private int nextCharCounter;
/// Default constructor that takes a <see cref="CharStream" />.
public MappingCharFilter(NormalizeCharMap normMap, CharStream @in)
: base(@in)
{
this.normMap = normMap;
}
/// Easy-use constructor that takes a <see cref="System.IO.TextReader" />.
public MappingCharFilter(NormalizeCharMap normMap, System.IO.TextReader @in)
: base(CharReader.Get(@in))
{
this.normMap = normMap;
}
public override int Read()
{
while (true)
{
if (replacement != null && charPointer < replacement.Length)
{
return replacement[charPointer++];
}
int firstChar = NextChar();
if (firstChar == - 1)
return - 1;
NormalizeCharMap nm = normMap.submap != null
? normMap.submap[(char) firstChar]
: null;
if (nm == null)
return firstChar;
NormalizeCharMap result = Match(nm);
if (result == null)
return firstChar;
replacement = result.normStr;
charPointer = 0;
if (result.diff != 0)
{
int prevCumulativeDiff = LastCumulativeDiff;
if (result.diff < 0)
{
for (int i = 0; i < - result.diff; i++)
AddOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
}
else
{
AddOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
}
}
}
}
private int NextChar()
{
nextCharCounter++;
if (buffer != null && buffer.Count != 0)
{
char tempObject = buffer.First.Value;
buffer.RemoveFirst();
return (tempObject);
}
return input.Read();
}
private void PushChar(int c)
{
nextCharCounter--;
if (buffer == null)
{
buffer = new LinkedList<char>();
}
buffer.AddFirst((char)c);
}
private void PushLastChar(int c)
{
if (buffer == null)
{
buffer = new LinkedList<char>();
}
buffer.AddLast((char)c);
}
private NormalizeCharMap Match(NormalizeCharMap map)
{
NormalizeCharMap result = null;
if (map.submap != null)
{
int chr = NextChar();
if (chr != - 1)
{
NormalizeCharMap subMap = map.submap[(char)chr];
if (subMap != null)
{
result = Match(subMap);
}
if (result == null)
{
PushChar(chr);
}
}
}
if (result == null && map.normStr != null)
{
result = map;
}
return result;
}
public override int Read(System.Char[] cbuf, int off, int len)
{
var tmp = new char[len];
int l = input.Read(tmp, 0, len);
if (l != 0)
{
for (int i = 0; i < l; i++)
PushLastChar(tmp[i]);
}
l = 0;
for (int i = off; i < off + len; i++)
{
int c = Read();
if (c == - 1)
break;
cbuf[i] = (char) c;
l++;
}
return l == 0?- 1:l;
}
}
}

View File

@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Lucene.Net.Support;
namespace Lucene.Net.Analysis
{
/// <summary> Holds a map of String input to String output, to be used
/// with <see cref="MappingCharFilter" />.
/// </summary>
public class NormalizeCharMap
{
internal System.Collections.Generic.IDictionary<char, NormalizeCharMap> submap;
internal System.String normStr;
internal int diff;
/// <summary>Records a replacement to be applied to the inputs
/// stream. Whenever <c>singleMatch</c> occurs in
/// the input, it will be replaced with
/// <c>replacement</c>.
///
/// </summary>
/// <param name="singleMatch">input String to be replaced
/// </param>
/// <param name="replacement">output String
/// </param>
public virtual void Add(System.String singleMatch, System.String replacement)
{
NormalizeCharMap currMap = this;
for (var i = 0; i < singleMatch.Length; i++)
{
char c = singleMatch[i];
if (currMap.submap == null)
{
currMap.submap = new HashMap<char, NormalizeCharMap>(1);
}
var map = currMap.submap[c];
if (map == null)
{
map = new NormalizeCharMap();
currMap.submap[c] = map;
}
currMap = map;
}
if (currMap.normStr != null)
{
throw new System.SystemException("MappingCharFilter: there is already a mapping for " + singleMatch);
}
currMap.normStr = replacement;
currMap.diff = singleMatch.Length - replacement.Length;
}
}
}

View File

@@ -0,0 +1,270 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Search;
using AttributeSource = Lucene.Net.Util.AttributeSource;
using NumericUtils = Lucene.Net.Util.NumericUtils;
using NumericField = Lucene.Net.Documents.NumericField;
// javadocs
namespace Lucene.Net.Analysis
{
/// <summary> <b>Expert:</b> This class provides a <see cref="TokenStream" />
/// for indexing numeric values that can be used by <see cref="NumericRangeQuery{T}" />
/// or <see cref="NumericRangeFilter{T}" />.
///
/// <p/>Note that for simple usage, <see cref="NumericField" /> is
/// recommended. <see cref="NumericField" /> disables norms and
/// term freqs, as they are not usually needed during
/// searching. If you need to change these settings, you
/// should use this class.
///
/// <p/>See <see cref="NumericField" /> for capabilities of fields
/// indexed numerically.<p/>
///
/// <p/>Here's an example usage, for an <c>int</c> field:
///
/// <code>
/// Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
/// field.setOmitNorms(true);
/// field.setOmitTermFreqAndPositions(true);
/// document.add(field);
/// </code>
///
/// <p/>For optimal performance, re-use the TokenStream and Field instance
/// for more than one document:
///
/// <code>
/// NumericTokenStream stream = new NumericTokenStream(precisionStep);
/// Field field = new Field(name, stream);
/// field.setOmitNorms(true);
/// field.setOmitTermFreqAndPositions(true);
/// Document document = new Document();
/// document.add(field);
///
/// for(all documents) {
/// stream.setIntValue(value)
/// writer.addDocument(document);
/// }
/// </code>
///
/// <p/>This stream is not intended to be used in analyzers;
/// it's more for iterating the different precisions during
/// indexing a specific numeric value.<p/>
///
/// <p/><b>NOTE</b>: as token streams are only consumed once
/// the document is added to the index, if you index more
/// than one numeric field, use a separate <c>NumericTokenStream</c>
/// instance for each.<p/>
///
/// <p/>See <see cref="NumericRangeQuery{T}" /> for more details on the
/// <a href="../search/NumericRangeQuery.html#precisionStepDesc"><c>precisionStep</c></a>
/// parameter as well as how numeric fields work under the hood.<p/>
///
/// <p/><font color="red"><b>NOTE:</b> This API is experimental and
/// might change in incompatible ways in the next release.</font>
/// Since 2.9
/// </summary>
public sealed class NumericTokenStream : TokenStream
{
private void InitBlock()
{
termAtt = AddAttribute<ITermAttribute>();
typeAtt = AddAttribute<ITypeAttribute>();
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
}
/// <summary>The full precision token gets this token type assigned. </summary>
public const System.String TOKEN_TYPE_FULL_PREC = "fullPrecNumeric";
/// <summary>The lower precision tokens gets this token type assigned. </summary>
public const System.String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
/// <summary> Creates a token stream for numeric values using the default <c>precisionStep</c>
/// <see cref="NumericUtils.PRECISION_STEP_DEFAULT" /> (4). The stream is not yet initialized,
/// before using set a value using the various set<em>???</em>Value() methods.
/// </summary>
public NumericTokenStream():this(NumericUtils.PRECISION_STEP_DEFAULT)
{
}
/// <summary> Creates a token stream for numeric values with the specified
/// <c>precisionStep</c>. The stream is not yet initialized,
/// before using set a value using the various set<em>???</em>Value() methods.
/// </summary>
public NumericTokenStream(int precisionStep):base()
{
InitBlock();
this.precisionStep = precisionStep;
if (precisionStep < 1)
throw new System.ArgumentException("precisionStep must be >=1");
}
/// <summary> Expert: Creates a token stream for numeric values with the specified
/// <c>precisionStep</c> using the given <see cref="AttributeSource" />.
/// The stream is not yet initialized,
/// before using set a value using the various set<em>???</em>Value() methods.
/// </summary>
public NumericTokenStream(AttributeSource source, int precisionStep):base(source)
{
InitBlock();
this.precisionStep = precisionStep;
if (precisionStep < 1)
throw new System.ArgumentException("precisionStep must be >=1");
}
/// <summary> Expert: Creates a token stream for numeric values with the specified
/// <c>precisionStep</c> using the given
/// <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory" />.
/// The stream is not yet initialized,
/// before using set a value using the various set<em>???</em>Value() methods.
/// </summary>
public NumericTokenStream(AttributeFactory factory, int precisionStep):base(factory)
{
InitBlock();
this.precisionStep = precisionStep;
if (precisionStep < 1)
throw new System.ArgumentException("precisionStep must be >=1");
}
/// <summary> Initializes the token stream with the supplied <c>long</c> value.</summary>
/// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
/// </param>
/// <returns> this instance, because of this you can use it the following way:
/// <c>new Field(name, new NumericTokenStream(precisionStep).SetLongValue(value))</c>
/// </returns>
public NumericTokenStream SetLongValue(long value_Renamed)
{
this.value_Renamed = value_Renamed;
valSize = 64;
shift = 0;
return this;
}
/// <summary> Initializes the token stream with the supplied <c>int</c> value.</summary>
/// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
/// </param>
/// <returns> this instance, because of this you can use it the following way:
/// <c>new Field(name, new NumericTokenStream(precisionStep).SetIntValue(value))</c>
/// </returns>
public NumericTokenStream SetIntValue(int value_Renamed)
{
this.value_Renamed = (long) value_Renamed;
valSize = 32;
shift = 0;
return this;
}
/// <summary> Initializes the token stream with the supplied <c>double</c> value.</summary>
/// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
/// </param>
/// <returns> this instance, because of this you can use it the following way:
/// <c>new Field(name, new NumericTokenStream(precisionStep).SetDoubleValue(value))</c>
/// </returns>
public NumericTokenStream SetDoubleValue(double value_Renamed)
{
this.value_Renamed = NumericUtils.DoubleToSortableLong(value_Renamed);
valSize = 64;
shift = 0;
return this;
}
/// <summary> Initializes the token stream with the supplied <c>float</c> value.</summary>
/// <param name="value_Renamed">the value, for which this TokenStream should enumerate tokens.
/// </param>
/// <returns> this instance, because of this you can use it the following way:
/// <c>new Field(name, new NumericTokenStream(precisionStep).SetFloatValue(value))</c>
/// </returns>
public NumericTokenStream SetFloatValue(float value_Renamed)
{
this.value_Renamed = (long) NumericUtils.FloatToSortableInt(value_Renamed);
valSize = 32;
shift = 0;
return this;
}
// @Override
public override void Reset()
{
if (valSize == 0)
throw new System.SystemException("call set???Value() before usage");
shift = 0;
}
protected override void Dispose(bool disposing)
{
// Do nothing.
}
// @Override
public override bool IncrementToken()
{
if (valSize == 0)
throw new System.SystemException("call set???Value() before usage");
if (shift >= valSize)
return false;
ClearAttributes();
char[] buffer;
switch (valSize)
{
case 64:
buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
termAtt.SetTermLength(NumericUtils.LongToPrefixCoded(value_Renamed, shift, buffer));
break;
case 32:
buffer = termAtt.ResizeTermBuffer(NumericUtils.BUF_SIZE_INT);
termAtt.SetTermLength(NumericUtils.IntToPrefixCoded((int) value_Renamed, shift, buffer));
break;
default:
// should not happen
throw new System.ArgumentException("valSize must be 32 or 64");
}
typeAtt.Type = (shift == 0)?TOKEN_TYPE_FULL_PREC:TOKEN_TYPE_LOWER_PREC;
posIncrAtt.PositionIncrement = (shift == 0)?1:0;
shift += precisionStep;
return true;
}
// @Override
public override System.String ToString()
{
System.Text.StringBuilder sb = new System.Text.StringBuilder("(numeric,valSize=").Append(valSize);
sb.Append(",precisionStep=").Append(precisionStep).Append(')');
return sb.ToString();
}
// members
private ITermAttribute termAtt;
private ITypeAttribute typeAtt;
private IPositionIncrementAttribute posIncrAtt;
private int shift = 0, valSize = 0; // valSize==0 means not initialized
private readonly int precisionStep;
private long value_Renamed = 0L;
}
}

Some files were not shown because too many files have changed in this diff Show More