mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
204 lines
7.3 KiB
Java
204 lines
7.3 KiB
Java
|
// Mozilla has modified this file - see http://hg.mozilla.org/ for details.
|
||
|
/*
|
||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||
|
* contributor license agreements. See the NOTICE file distributed with
|
||
|
* this work for additional information regarding copyright ownership.
|
||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||
|
* (the "License"); you may not use this file except in compliance with
|
||
|
* the License. You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
package org.mozilla.apache.commons.codec.language;
|
||
|
|
||
|
import org.mozilla.apache.commons.codec.EncoderException;
|
||
|
import org.mozilla.apache.commons.codec.StringEncoder;
|
||
|
|
||
|
/**
|
||
|
* Encodes a string into a Refined Soundex value. A refined soundex code is
|
||
|
* optimized for spell checking words. Soundex method originally developed by
|
||
|
* <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>.
|
||
|
*
|
||
|
* @author Apache Software Foundation
|
||
|
* @version $Id: RefinedSoundex.java 1064455 2011-01-28 04:40:27Z ggregory $
|
||
|
*/
|
||
|
public class RefinedSoundex implements StringEncoder {
|
||
|
|
||
|
/**
|
||
|
* @since 1.4
|
||
|
*/
|
||
|
public static final String US_ENGLISH_MAPPING_STRING = "01360240043788015936020505";
|
||
|
|
||
|
/**
|
||
|
* RefinedSoundex is *refined* for a number of reasons one being that the
|
||
|
* mappings have been altered. This implementation contains default
|
||
|
* mappings for US English.
|
||
|
*/
|
||
|
private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray();
|
||
|
|
||
|
/**
|
||
|
* Every letter of the alphabet is "mapped" to a numerical value. This char
|
||
|
* array holds the values to which each letter is mapped. This
|
||
|
* implementation contains a default map for US_ENGLISH
|
||
|
*/
|
||
|
private final char[] soundexMapping;
|
||
|
|
||
|
/**
|
||
|
* This static variable contains an instance of the RefinedSoundex using
|
||
|
* the US_ENGLISH mapping.
|
||
|
*/
|
||
|
public static final RefinedSoundex US_ENGLISH = new RefinedSoundex();
|
||
|
|
||
|
/**
|
||
|
* Creates an instance of the RefinedSoundex object using the default US
|
||
|
* English mapping.
|
||
|
*/
|
||
|
public RefinedSoundex() {
|
||
|
this.soundexMapping = US_ENGLISH_MAPPING;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Creates a refined soundex instance using a custom mapping. This
|
||
|
* constructor can be used to customize the mapping, and/or possibly
|
||
|
* provide an internationalized mapping for a non-Western character set.
|
||
|
*
|
||
|
* @param mapping
|
||
|
* Mapping array to use when finding the corresponding code for
|
||
|
* a given character
|
||
|
*/
|
||
|
public RefinedSoundex(char[] mapping) {
|
||
|
this.soundexMapping = new char[mapping.length];
|
||
|
System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Creates a refined Soundex instance using a custom mapping. This constructor can be used to customize the mapping,
|
||
|
* and/or possibly provide an internationalized mapping for a non-Western character set.
|
||
|
*
|
||
|
* @param mapping
|
||
|
* Mapping string to use when finding the corresponding code for a given character
|
||
|
* @since 1.4
|
||
|
*/
|
||
|
public RefinedSoundex(String mapping) {
|
||
|
this.soundexMapping = mapping.toCharArray();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns the number of characters in the two encoded Strings that are the
|
||
|
* same. This return value ranges from 0 to the length of the shortest
|
||
|
* encoded String: 0 indicates little or no similarity, and 4 out of 4 (for
|
||
|
* example) indicates strong similarity or identical values. For refined
|
||
|
* Soundex, the return value can be greater than 4.
|
||
|
*
|
||
|
* @param s1
|
||
|
* A String that will be encoded and compared.
|
||
|
* @param s2
|
||
|
* A String that will be encoded and compared.
|
||
|
* @return The number of characters in the two encoded Strings that are the
|
||
|
* same from 0 to to the length of the shortest encoded String.
|
||
|
*
|
||
|
* @see SoundexUtils#difference(StringEncoder,String,String)
|
||
|
* @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
|
||
|
* MS T-SQL DIFFERENCE</a>
|
||
|
*
|
||
|
* @throws EncoderException
|
||
|
* if an error occurs encoding one of the strings
|
||
|
* @since 1.3
|
||
|
*/
|
||
|
public int difference(String s1, String s2) throws EncoderException {
|
||
|
return SoundexUtils.difference(this, s1, s2);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Encodes an Object using the refined soundex algorithm. This method is
|
||
|
* provided in order to satisfy the requirements of the Encoder interface,
|
||
|
* and will throw an EncoderException if the supplied object is not of type
|
||
|
* java.lang.String.
|
||
|
*
|
||
|
* @param pObject
|
||
|
* Object to encode
|
||
|
* @return An object (or type java.lang.String) containing the refined
|
||
|
* soundex code which corresponds to the String supplied.
|
||
|
* @throws EncoderException
|
||
|
* if the parameter supplied is not of type java.lang.String
|
||
|
*/
|
||
|
public Object encode(Object pObject) throws EncoderException {
|
||
|
if (!(pObject instanceof String)) {
|
||
|
throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String");
|
||
|
}
|
||
|
return soundex((String) pObject);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Encodes a String using the refined soundex algorithm.
|
||
|
*
|
||
|
* @param pString
|
||
|
* A String object to encode
|
||
|
* @return A Soundex code corresponding to the String supplied
|
||
|
*/
|
||
|
public String encode(String pString) {
|
||
|
return soundex(pString);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns the mapping code for a given character. The mapping codes are
|
||
|
* maintained in an internal char array named soundexMapping, and the
|
||
|
* default values of these mappings are US English.
|
||
|
*
|
||
|
* @param c
|
||
|
* char to get mapping for
|
||
|
* @return A character (really a numeral) to return for the given char
|
||
|
*/
|
||
|
char getMappingCode(char c) {
|
||
|
if (!Character.isLetter(c)) {
|
||
|
return 0;
|
||
|
}
|
||
|
return this.soundexMapping[Character.toUpperCase(c) - 'A'];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retreives the Refined Soundex code for a given String object.
|
||
|
*
|
||
|
* @param str
|
||
|
* String to encode using the Refined Soundex algorithm
|
||
|
* @return A soundex code for the String supplied
|
||
|
*/
|
||
|
public String soundex(String str) {
|
||
|
if (str == null) {
|
||
|
return null;
|
||
|
}
|
||
|
str = SoundexUtils.clean(str);
|
||
|
if (str.length() == 0) {
|
||
|
return str;
|
||
|
}
|
||
|
|
||
|
StringBuffer sBuf = new StringBuffer();
|
||
|
sBuf.append(str.charAt(0));
|
||
|
|
||
|
char last, current;
|
||
|
last = '*';
|
||
|
|
||
|
for (int i = 0; i < str.length(); i++) {
|
||
|
|
||
|
current = getMappingCode(str.charAt(i));
|
||
|
if (current == last) {
|
||
|
continue;
|
||
|
} else if (current != 0) {
|
||
|
sBuf.append(current);
|
||
|
}
|
||
|
|
||
|
last = current;
|
||
|
|
||
|
}
|
||
|
|
||
|
return sBuf.toString();
|
||
|
}
|
||
|
}
|