2008-09-30 17:50:42 -07:00
|
|
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2012-05-21 04:12:37 -07:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
2008-09-30 17:50:42 -07:00
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
#ifndef __nsCharSeparatedTokenizer_h
|
|
|
|
#define __nsCharSeparatedTokenizer_h
|
2008-09-30 17:50:42 -07:00
|
|
|
|
|
|
|
#include "nsDependentSubstring.h"
|
2010-07-21 09:07:10 -07:00
|
|
|
#include "nsCRT.h"
|
2008-09-30 17:50:42 -07:00
|
|
|
|
|
|
|
/**
|
2010-05-04 07:43:48 -07:00
|
|
|
* This parses a SeparatorChar-separated string into tokens.
|
|
|
|
* Whitespace surrounding tokens is not treated as part of tokens, however
|
|
|
|
* whitespace inside a token is. If the final token is the empty string, it is
|
|
|
|
* not returned.
|
2008-09-30 17:50:42 -07:00
|
|
|
*
|
2010-05-04 07:43:48 -07:00
|
|
|
* Some examples, with SeparatorChar = ',':
|
2008-09-30 17:50:42 -07:00
|
|
|
*
|
|
|
|
* "foo, bar, baz" -> "foo" "bar" "baz"
|
|
|
|
* "foo,bar,baz" -> "foo" "bar" "baz"
|
|
|
|
* "foo , bar hi , baz" -> "foo" "bar hi" "baz"
|
|
|
|
* "foo, ,bar,baz" -> "foo" "" "bar" "baz"
|
|
|
|
* "foo,,bar,baz" -> "foo" "" "bar" "baz"
|
|
|
|
* "foo,bar,baz," -> "foo" "bar" "baz"
|
2010-07-21 09:07:10 -07:00
|
|
|
*
|
|
|
|
* The function used for whitespace detection is a template argument.
|
2010-07-27 04:45:59 -07:00
|
|
|
* By default, it is NS_IsAsciiWhitespace.
|
2008-09-30 17:50:42 -07:00
|
|
|
*/
|
2011-09-28 23:19:26 -07:00
|
|
|
template<bool IsWhitespace(PRUnichar) = NS_IsAsciiWhitespace>
|
2010-07-21 09:07:10 -07:00
|
|
|
class nsCharSeparatedTokenizerTemplate
|
2008-09-30 17:50:42 -07:00
|
|
|
{
|
|
|
|
public:
|
2010-05-04 07:43:48 -07:00
|
|
|
// Flags -- only one for now. If we need more, they should be defined to
|
|
|
|
// be 1<<1, 1<<2, etc. (They're masks, and aFlags/mFlags are bitfields.)
|
|
|
|
enum {
|
|
|
|
SEPARATOR_OPTIONAL = 1
|
|
|
|
};
|
|
|
|
|
2010-07-21 09:07:10 -07:00
|
|
|
nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,
|
|
|
|
PRUnichar aSeparatorChar,
|
|
|
|
PRUint32 aFlags = 0)
|
2011-10-17 07:59:28 -07:00
|
|
|
: mFirstTokenBeganWithWhitespace(false),
|
|
|
|
mLastTokenEndedWithWhitespace(false),
|
|
|
|
mLastTokenEndedWithSeparator(false),
|
2010-05-04 07:43:48 -07:00
|
|
|
mSeparatorChar(aSeparatorChar),
|
|
|
|
mFlags(aFlags)
|
2008-09-30 17:50:42 -07:00
|
|
|
{
|
|
|
|
aSource.BeginReading(mIter);
|
|
|
|
aSource.EndReading(mEnd);
|
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
// Skip initial whitespace
|
2010-07-21 09:07:10 -07:00
|
|
|
while (mIter != mEnd && IsWhitespace(*mIter)) {
|
2011-10-17 07:59:28 -07:00
|
|
|
mFirstTokenBeganWithWhitespace = true;
|
2008-09-30 17:50:42 -07:00
|
|
|
++mIter;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if any more tokens are available.
|
|
|
|
*/
|
2011-09-28 23:19:26 -07:00
|
|
|
bool hasMoreTokens()
|
2008-09-30 17:50:42 -07:00
|
|
|
{
|
2010-07-21 09:07:10 -07:00
|
|
|
NS_ASSERTION(mIter == mEnd || !IsWhitespace(*mIter),
|
2008-09-30 17:50:42 -07:00
|
|
|
"Should be at beginning of token if there is one");
|
|
|
|
|
|
|
|
return mIter != mEnd;
|
|
|
|
}
|
|
|
|
|
2011-09-28 23:19:26 -07:00
|
|
|
bool firstTokenBeganWithWhitespace() const
|
2011-07-23 01:41:17 -07:00
|
|
|
{
|
|
|
|
return mFirstTokenBeganWithWhitespace;
|
|
|
|
}
|
|
|
|
|
2011-09-28 23:19:26 -07:00
|
|
|
bool lastTokenEndedWithSeparator() const
|
2008-12-16 18:11:07 -08:00
|
|
|
{
|
2010-05-04 07:43:48 -07:00
|
|
|
return mLastTokenEndedWithSeparator;
|
2008-12-16 18:11:07 -08:00
|
|
|
}
|
|
|
|
|
2011-09-28 23:19:26 -07:00
|
|
|
bool lastTokenEndedWithWhitespace() const
|
2011-07-23 01:41:17 -07:00
|
|
|
{
|
|
|
|
return mLastTokenEndedWithWhitespace;
|
|
|
|
}
|
|
|
|
|
2008-09-30 17:50:42 -07:00
|
|
|
/**
|
|
|
|
* Returns the next token.
|
|
|
|
*/
|
|
|
|
const nsDependentSubstring nextToken()
|
|
|
|
{
|
|
|
|
nsSubstring::const_char_iterator end = mIter, begin = mIter;
|
|
|
|
|
2010-07-21 09:07:10 -07:00
|
|
|
NS_ASSERTION(mIter == mEnd || !IsWhitespace(*mIter),
|
2008-09-30 17:50:42 -07:00
|
|
|
"Should be at beginning of token if there is one");
|
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
// Search until we hit separator or end (or whitespace, if separator
|
|
|
|
// isn't required -- see clause with 'break' below).
|
2010-05-04 07:43:48 -07:00
|
|
|
while (mIter != mEnd && *mIter != mSeparatorChar) {
|
|
|
|
// Skip to end of current word.
|
|
|
|
while (mIter != mEnd &&
|
2010-07-21 09:07:10 -07:00
|
|
|
!IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
|
2008-09-30 17:50:42 -07:00
|
|
|
++mIter;
|
|
|
|
}
|
|
|
|
end = mIter;
|
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
// Skip whitespace after current word.
|
2011-10-17 07:59:28 -07:00
|
|
|
mLastTokenEndedWithWhitespace = false;
|
2010-07-21 09:07:10 -07:00
|
|
|
while (mIter != mEnd && IsWhitespace(*mIter)) {
|
2011-10-17 07:59:28 -07:00
|
|
|
mLastTokenEndedWithWhitespace = true;
|
2008-09-30 17:50:42 -07:00
|
|
|
++mIter;
|
|
|
|
}
|
2010-05-04 07:43:48 -07:00
|
|
|
if (mFlags & SEPARATOR_OPTIONAL) {
|
|
|
|
// We've hit (and skipped) whitespace, and that's sufficient to end
|
|
|
|
// our token, regardless of whether we've reached a SeparatorChar.
|
|
|
|
break;
|
|
|
|
} // (else, we'll keep looping until we hit mEnd or SeparatorChar)
|
2008-09-30 17:50:42 -07:00
|
|
|
}
|
2008-12-16 18:11:07 -08:00
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
mLastTokenEndedWithSeparator = (mIter != mEnd &&
|
|
|
|
*mIter == mSeparatorChar);
|
|
|
|
NS_ASSERTION((mFlags & SEPARATOR_OPTIONAL) ||
|
|
|
|
(mLastTokenEndedWithSeparator == (mIter != mEnd)),
|
|
|
|
"If we require a separator and haven't hit the end of "
|
|
|
|
"our string, then we shouldn't have left the loop "
|
|
|
|
"unless we hit a separator");
|
|
|
|
|
|
|
|
// Skip separator (and any whitespace after it), if we're at one.
|
2010-05-04 07:43:48 -07:00
|
|
|
if (mLastTokenEndedWithSeparator) {
|
2008-09-30 17:50:42 -07:00
|
|
|
++mIter;
|
|
|
|
|
2010-07-21 09:07:10 -07:00
|
|
|
while (mIter != mEnd && IsWhitespace(*mIter)) {
|
2008-09-30 17:50:42 -07:00
|
|
|
++mIter;
|
|
|
|
}
|
|
|
|
}
|
2010-05-04 07:43:48 -07:00
|
|
|
|
2008-09-30 17:50:42 -07:00
|
|
|
return Substring(begin, end);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
nsSubstring::const_char_iterator mIter, mEnd;
|
2011-09-28 23:19:26 -07:00
|
|
|
bool mFirstTokenBeganWithWhitespace;
|
|
|
|
bool mLastTokenEndedWithWhitespace;
|
|
|
|
bool mLastTokenEndedWithSeparator;
|
2010-05-04 07:43:48 -07:00
|
|
|
PRUnichar mSeparatorChar;
|
2010-05-04 07:43:48 -07:00
|
|
|
PRUint32 mFlags;
|
2010-07-21 09:07:10 -07:00
|
|
|
};
|
2008-09-30 17:50:42 -07:00
|
|
|
|
2010-07-21 09:07:10 -07:00
|
|
|
class nsCharSeparatedTokenizer: public nsCharSeparatedTokenizerTemplate<>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
nsCharSeparatedTokenizer(const nsSubstring& aSource,
|
|
|
|
PRUnichar aSeparatorChar,
|
|
|
|
PRUint32 aFlags = 0)
|
|
|
|
: nsCharSeparatedTokenizerTemplate<>(aSource, aSeparatorChar, aFlags)
|
2008-09-30 17:50:42 -07:00
|
|
|
{
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
class nsCCharSeparatedTokenizer
|
2008-09-30 17:50:42 -07:00
|
|
|
{
|
|
|
|
public:
|
2010-05-04 07:43:48 -07:00
|
|
|
nsCCharSeparatedTokenizer(const nsCSubstring& aSource,
|
|
|
|
char aSeparatorChar)
|
|
|
|
: mSeparatorChar(aSeparatorChar)
|
2008-09-30 17:50:42 -07:00
|
|
|
{
|
|
|
|
aSource.BeginReading(mIter);
|
|
|
|
aSource.EndReading(mEnd);
|
|
|
|
|
|
|
|
while (mIter != mEnd && isWhitespace(*mIter)) {
|
|
|
|
++mIter;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if any more tokens are available.
|
|
|
|
*/
|
2011-09-28 23:19:26 -07:00
|
|
|
bool hasMoreTokens()
|
2008-09-30 17:50:42 -07:00
|
|
|
{
|
|
|
|
return mIter != mEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the next token.
|
|
|
|
*/
|
|
|
|
const nsDependentCSubstring nextToken()
|
|
|
|
{
|
|
|
|
nsCSubstring::const_char_iterator end = mIter, begin = mIter;
|
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
// Search until we hit separator or end.
|
|
|
|
while (mIter != mEnd && *mIter != mSeparatorChar) {
|
|
|
|
while (mIter != mEnd &&
|
|
|
|
!isWhitespace(*mIter) && *mIter != mSeparatorChar) {
|
2008-09-30 17:50:42 -07:00
|
|
|
++mIter;
|
|
|
|
}
|
|
|
|
end = mIter;
|
|
|
|
|
|
|
|
while (mIter != mEnd && isWhitespace(*mIter)) {
|
|
|
|
++mIter;
|
|
|
|
}
|
|
|
|
}
|
2010-05-04 07:43:48 -07:00
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
// Skip separator (and any whitespace after it).
|
2008-09-30 17:50:42 -07:00
|
|
|
if (mIter != mEnd) {
|
2010-05-04 07:43:48 -07:00
|
|
|
NS_ASSERTION(*mIter == mSeparatorChar, "Ended loop too soon");
|
2008-09-30 17:50:42 -07:00
|
|
|
++mIter;
|
|
|
|
|
|
|
|
while (mIter != mEnd && isWhitespace(*mIter)) {
|
|
|
|
++mIter;
|
|
|
|
}
|
|
|
|
}
|
2010-05-04 07:43:48 -07:00
|
|
|
|
2008-09-30 17:50:42 -07:00
|
|
|
return Substring(begin, end);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
nsCSubstring::const_char_iterator mIter, mEnd;
|
2010-05-04 07:43:48 -07:00
|
|
|
char mSeparatorChar;
|
2008-09-30 17:50:42 -07:00
|
|
|
|
2011-09-28 23:19:26 -07:00
|
|
|
bool isWhitespace(unsigned char aChar)
|
2008-09-30 17:50:42 -07:00
|
|
|
{
|
|
|
|
return aChar <= ' ' &&
|
|
|
|
(aChar == ' ' || aChar == '\n' ||
|
|
|
|
aChar == '\r'|| aChar == '\t');
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2010-05-04 07:43:48 -07:00
|
|
|
#endif /* __nsCharSeparatedTokenizer_h */
|