mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
426 lines
14 KiB
Java
426 lines
14 KiB
Java
/*
|
|
* ====================================================================
|
|
* Licensed to the Apache Software Foundation (ASF) under one
|
|
* or more contributor license agreements. See the NOTICE file
|
|
* distributed with this work for additional information
|
|
* regarding copyright ownership. The ASF licenses this file
|
|
* to you under the Apache License, Version 2.0 (the
|
|
* "License"); you may not use this file except in compliance
|
|
* with the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing,
|
|
* software distributed under the License is distributed on an
|
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
* KIND, either express or implied. See the License for the
|
|
* specific language governing permissions and limitations
|
|
* under the License.
|
|
* ====================================================================
|
|
*
|
|
* This software consists of voluntary contributions made by many
|
|
* individuals on behalf of the Apache Software Foundation. For more
|
|
* information on the Apache Software Foundation, please see
|
|
* <http://www.apache.org/>.
|
|
*
|
|
*/
|
|
|
|
package ch.boye.httpclientandroidlib.message;
|
|
|
|
import java.util.NoSuchElementException;
|
|
|
|
import ch.boye.httpclientandroidlib.HeaderIterator;
|
|
import ch.boye.httpclientandroidlib.ParseException;
|
|
import ch.boye.httpclientandroidlib.TokenIterator;
|
|
|
|
/**
|
|
* Basic implementation of a {@link TokenIterator}.
|
|
* This implementation parses <tt>#token<tt> sequences as
|
|
* defined by RFC 2616, section 2.
|
|
* It extends that definition somewhat beyond US-ASCII.
|
|
*
|
|
* @since 4.0
|
|
*/
|
|
public class BasicTokenIterator implements TokenIterator {
|
|
|
|
/** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
|
|
// the order of the characters here is adjusted to put the
|
|
// most likely candidates at the beginning of the collection
|
|
public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
|
|
|
|
|
|
/** The iterator from which to obtain the next header. */
|
|
protected final HeaderIterator headerIt;
|
|
|
|
/**
|
|
* The value of the current header.
|
|
* This is the header value that includes {@link #currentToken}.
|
|
* Undefined if the iteration is over.
|
|
*/
|
|
protected String currentHeader;
|
|
|
|
/**
|
|
* The token to be returned by the next call to {@link #currentToken}.
|
|
* <code>null</code> if the iteration is over.
|
|
*/
|
|
protected String currentToken;
|
|
|
|
/**
|
|
* The position after {@link #currentToken} in {@link #currentHeader}.
|
|
* Undefined if the iteration is over.
|
|
*/
|
|
protected int searchPos;
|
|
|
|
|
|
/**
|
|
* Creates a new instance of {@link BasicTokenIterator}.
|
|
*
|
|
* @param headerIterator the iterator for the headers to tokenize
|
|
*/
|
|
public BasicTokenIterator(final HeaderIterator headerIterator) {
|
|
if (headerIterator == null) {
|
|
throw new IllegalArgumentException
|
|
("Header iterator must not be null.");
|
|
}
|
|
|
|
this.headerIt = headerIterator;
|
|
this.searchPos = findNext(-1);
|
|
}
|
|
|
|
|
|
// non-javadoc, see interface TokenIterator
|
|
public boolean hasNext() {
|
|
return (this.currentToken != null);
|
|
}
|
|
|
|
|
|
/**
|
|
* Obtains the next token from this iteration.
|
|
*
|
|
* @return the next token in this iteration
|
|
*
|
|
* @throws NoSuchElementException if the iteration is already over
|
|
* @throws ParseException if an invalid header value is encountered
|
|
*/
|
|
public String nextToken()
|
|
throws NoSuchElementException, ParseException {
|
|
|
|
if (this.currentToken == null) {
|
|
throw new NoSuchElementException("Iteration already finished.");
|
|
}
|
|
|
|
final String result = this.currentToken;
|
|
// updates currentToken, may trigger ParseException:
|
|
this.searchPos = findNext(this.searchPos);
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* Returns the next token.
|
|
* Same as {@link #nextToken}, but with generic return type.
|
|
*
|
|
* @return the next token in this iteration
|
|
*
|
|
* @throws NoSuchElementException if there are no more tokens
|
|
* @throws ParseException if an invalid header value is encountered
|
|
*/
|
|
public final Object next()
|
|
throws NoSuchElementException, ParseException {
|
|
return nextToken();
|
|
}
|
|
|
|
|
|
/**
|
|
* Removing tokens is not supported.
|
|
*
|
|
* @throws UnsupportedOperationException always
|
|
*/
|
|
public final void remove()
|
|
throws UnsupportedOperationException {
|
|
|
|
throw new UnsupportedOperationException
|
|
("Removing tokens is not supported.");
|
|
}
|
|
|
|
|
|
/**
|
|
* Determines the next token.
|
|
* If found, the token is stored in {@link #currentToken}.
|
|
* The return value indicates the position after the token
|
|
* in {@link #currentHeader}. If necessary, the next header
|
|
* will be obtained from {@link #headerIt}.
|
|
* If not found, {@link #currentToken} is set to <code>null</code>.
|
|
*
|
|
* @param from the position in the current header at which to
|
|
* start the search, -1 to search in the first header
|
|
*
|
|
* @return the position after the found token in the current header, or
|
|
* negative if there was no next token
|
|
*
|
|
* @throws ParseException if an invalid header value is encountered
|
|
*/
|
|
protected int findNext(int from)
|
|
throws ParseException {
|
|
|
|
if (from < 0) {
|
|
// called from the constructor, initialize the first header
|
|
if (!this.headerIt.hasNext()) {
|
|
return -1;
|
|
}
|
|
this.currentHeader = this.headerIt.nextHeader().getValue();
|
|
from = 0;
|
|
} else {
|
|
// called after a token, make sure there is a separator
|
|
from = findTokenSeparator(from);
|
|
}
|
|
|
|
int start = findTokenStart(from);
|
|
if (start < 0) {
|
|
this.currentToken = null;
|
|
return -1; // nothing found
|
|
}
|
|
|
|
int end = findTokenEnd(start);
|
|
this.currentToken = createToken(this.currentHeader, start, end);
|
|
return end;
|
|
}
|
|
|
|
|
|
/**
|
|
* Creates a new token to be returned.
|
|
* Called from {@link #findNext findNext} after the token is identified.
|
|
* The default implementation simply calls
|
|
* {@link java.lang.String#substring String.substring}.
|
|
* <br/>
|
|
* If header values are significantly longer than tokens, and some
|
|
* tokens are permanently referenced by the application, there can
|
|
* be problems with garbage collection. A substring will hold a
|
|
* reference to the full characters of the original string and
|
|
* therefore occupies more memory than might be expected.
|
|
* To avoid this, override this method and create a new string
|
|
* instead of a substring.
|
|
*
|
|
* @param value the full header value from which to create a token
|
|
* @param start the index of the first token character
|
|
* @param end the index after the last token character
|
|
*
|
|
* @return a string representing the token identified by the arguments
|
|
*/
|
|
protected String createToken(String value, int start, int end) {
|
|
return value.substring(start, end);
|
|
}
|
|
|
|
|
|
/**
|
|
* Determines the starting position of the next token.
|
|
* This method will iterate over headers if necessary.
|
|
*
|
|
* @param from the position in the current header at which to
|
|
* start the search
|
|
*
|
|
* @return the position of the token start in the current header,
|
|
* negative if no token start could be found
|
|
*/
|
|
protected int findTokenStart(int from) {
|
|
if (from < 0) {
|
|
throw new IllegalArgumentException
|
|
("Search position must not be negative: " + from);
|
|
}
|
|
|
|
boolean found = false;
|
|
while (!found && (this.currentHeader != null)) {
|
|
|
|
final int to = this.currentHeader.length();
|
|
while (!found && (from < to)) {
|
|
|
|
final char ch = this.currentHeader.charAt(from);
|
|
if (isTokenSeparator(ch) || isWhitespace(ch)) {
|
|
// whitspace and token separators are skipped
|
|
from++;
|
|
} else if (isTokenChar(this.currentHeader.charAt(from))) {
|
|
// found the start of a token
|
|
found = true;
|
|
} else {
|
|
throw new ParseException
|
|
("Invalid character before token (pos " + from +
|
|
"): " + this.currentHeader);
|
|
}
|
|
}
|
|
if (!found) {
|
|
if (this.headerIt.hasNext()) {
|
|
this.currentHeader = this.headerIt.nextHeader().getValue();
|
|
from = 0;
|
|
} else {
|
|
this.currentHeader = null;
|
|
}
|
|
}
|
|
} // while headers
|
|
|
|
return found ? from : -1;
|
|
}
|
|
|
|
|
|
/**
|
|
* Determines the position of the next token separator.
|
|
* Because of multi-header joining rules, the end of a
|
|
* header value is a token separator. This method does
|
|
* therefore not need to iterate over headers.
|
|
*
|
|
* @param from the position in the current header at which to
|
|
* start the search
|
|
*
|
|
* @return the position of a token separator in the current header,
|
|
* or at the end
|
|
*
|
|
* @throws ParseException
|
|
* if a new token is found before a token separator.
|
|
* RFC 2616, section 2.1 explicitly requires a comma between
|
|
* tokens for <tt>#</tt>.
|
|
*/
|
|
protected int findTokenSeparator(int from) {
|
|
if (from < 0) {
|
|
throw new IllegalArgumentException
|
|
("Search position must not be negative: " + from);
|
|
}
|
|
|
|
boolean found = false;
|
|
final int to = this.currentHeader.length();
|
|
while (!found && (from < to)) {
|
|
final char ch = this.currentHeader.charAt(from);
|
|
if (isTokenSeparator(ch)) {
|
|
found = true;
|
|
} else if (isWhitespace(ch)) {
|
|
from++;
|
|
} else if (isTokenChar(ch)) {
|
|
throw new ParseException
|
|
("Tokens without separator (pos " + from +
|
|
"): " + this.currentHeader);
|
|
} else {
|
|
throw new ParseException
|
|
("Invalid character after token (pos " + from +
|
|
"): " + this.currentHeader);
|
|
}
|
|
}
|
|
|
|
return from;
|
|
}
|
|
|
|
|
|
/**
|
|
* Determines the ending position of the current token.
|
|
* This method will not leave the current header value,
|
|
* since the end of the header value is a token boundary.
|
|
*
|
|
* @param from the position of the first character of the token
|
|
*
|
|
* @return the position after the last character of the token.
|
|
* The behavior is undefined if <code>from</code> does not
|
|
* point to a token character in the current header value.
|
|
*/
|
|
protected int findTokenEnd(int from) {
|
|
if (from < 0) {
|
|
throw new IllegalArgumentException
|
|
("Token start position must not be negative: " + from);
|
|
}
|
|
|
|
final int to = this.currentHeader.length();
|
|
int end = from+1;
|
|
while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
|
|
end++;
|
|
}
|
|
|
|
return end;
|
|
}
|
|
|
|
|
|
/**
|
|
* Checks whether a character is a token separator.
|
|
* RFC 2616, section 2.1 defines comma as the separator for
|
|
* <tt>#token</tt> sequences. The end of a header value will
|
|
* also separate tokens, but that is not a character check.
|
|
*
|
|
* @param ch the character to check
|
|
*
|
|
* @return <code>true</code> if the character is a token separator,
|
|
* <code>false</code> otherwise
|
|
*/
|
|
protected boolean isTokenSeparator(char ch) {
|
|
return (ch == ',');
|
|
}
|
|
|
|
|
|
/**
|
|
* Checks whether a character is a whitespace character.
|
|
* RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
|
|
* The optional preceeding line break is irrelevant, since header
|
|
* continuation is handled transparently when parsing messages.
|
|
*
|
|
* @param ch the character to check
|
|
*
|
|
* @return <code>true</code> if the character is whitespace,
|
|
* <code>false</code> otherwise
|
|
*/
|
|
protected boolean isWhitespace(char ch) {
|
|
|
|
// we do not use Character.isWhitspace(ch) here, since that allows
|
|
// many control characters which are not whitespace as per RFC 2616
|
|
return ((ch == '\t') || Character.isSpaceChar(ch));
|
|
}
|
|
|
|
|
|
/**
|
|
* Checks whether a character is a valid token character.
|
|
* Whitespace, control characters, and HTTP separators are not
|
|
* valid token characters. The HTTP specification (RFC 2616, section 2.2)
|
|
* defines tokens only for the US-ASCII character set, this
|
|
* method extends the definition to other character sets.
|
|
*
|
|
* @param ch the character to check
|
|
*
|
|
* @return <code>true</code> if the character is a valid token start,
|
|
* <code>false</code> otherwise
|
|
*/
|
|
protected boolean isTokenChar(char ch) {
|
|
|
|
// common sense extension of ALPHA + DIGIT
|
|
if (Character.isLetterOrDigit(ch))
|
|
return true;
|
|
|
|
// common sense extension of CTL
|
|
if (Character.isISOControl(ch))
|
|
return false;
|
|
|
|
// no common sense extension for this
|
|
if (isHttpSeparator(ch))
|
|
return false;
|
|
|
|
// RFC 2616, section 2.2 defines a token character as
|
|
// "any CHAR except CTLs or separators". The controls
|
|
// and separators are included in the checks above.
|
|
// This will yield unexpected results for Unicode format characters.
|
|
// If that is a problem, overwrite isHttpSeparator(char) to filter
|
|
// out the false positives.
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Checks whether a character is an HTTP separator.
|
|
* The implementation in this class checks only for the HTTP separators
|
|
* defined in RFC 2616, section 2.2. If you need to detect other
|
|
* separators beyond the US-ASCII character set, override this method.
|
|
*
|
|
* @param ch the character to check
|
|
*
|
|
* @return <code>true</code> if the character is an HTTP separator
|
|
*/
|
|
protected boolean isHttpSeparator(char ch) {
|
|
return (HTTP_SEPARATORS.indexOf(ch) >= 0);
|
|
}
|
|
|
|
|
|
} // class BasicTokenIterator
|
|
|