gecko/parser/htmlparser/nsScanner.cpp
Birunthan Mohanathas 89b6e87981 Bug 1057923 - Flatten parser/htmlparser/{public,src}/ into parent directory. r=mrbkap
--HG--
rename : parser/htmlparser/src/CNavDTD.cpp => parser/htmlparser/CNavDTD.cpp
rename : parser/htmlparser/src/CNavDTD.h => parser/htmlparser/CNavDTD.h
rename : parser/htmlparser/src/CParserContext.cpp => parser/htmlparser/CParserContext.cpp
rename : parser/htmlparser/src/CParserContext.h => parser/htmlparser/CParserContext.h
rename : parser/htmlparser/src/nsElementTable.cpp => parser/htmlparser/nsElementTable.cpp
rename : parser/htmlparser/src/nsElementTable.h => parser/htmlparser/nsElementTable.h
rename : parser/htmlparser/src/nsExpatDriver.cpp => parser/htmlparser/nsExpatDriver.cpp
rename : parser/htmlparser/src/nsExpatDriver.h => parser/htmlparser/nsExpatDriver.h
rename : parser/htmlparser/src/nsHTMLEntities.cpp => parser/htmlparser/nsHTMLEntities.cpp
rename : parser/htmlparser/src/nsHTMLEntities.h => parser/htmlparser/nsHTMLEntities.h
rename : parser/htmlparser/src/nsHTMLEntityList.h => parser/htmlparser/nsHTMLEntityList.h
rename : parser/htmlparser/public/nsHTMLTagList.h => parser/htmlparser/nsHTMLTagList.h
rename : parser/htmlparser/src/nsHTMLTags.cpp => parser/htmlparser/nsHTMLTags.cpp
rename : parser/htmlparser/public/nsHTMLTags.h => parser/htmlparser/nsHTMLTags.h
rename : parser/htmlparser/src/nsHTMLTokenizer.cpp => parser/htmlparser/nsHTMLTokenizer.cpp
rename : parser/htmlparser/src/nsHTMLTokenizer.h => parser/htmlparser/nsHTMLTokenizer.h
rename : parser/htmlparser/public/nsIContentSink.h => parser/htmlparser/nsIContentSink.h
rename : parser/htmlparser/public/nsIDTD.h => parser/htmlparser/nsIDTD.h
rename : parser/htmlparser/public/nsIExpatSink.idl => parser/htmlparser/nsIExpatSink.idl
rename : parser/htmlparser/public/nsIExtendedExpatSink.idl => parser/htmlparser/nsIExtendedExpatSink.idl
rename : parser/htmlparser/public/nsIFragmentContentSink.h => parser/htmlparser/nsIFragmentContentSink.h
rename : parser/htmlparser/public/nsIHTMLContentSink.h => parser/htmlparser/nsIHTMLContentSink.h
rename : parser/htmlparser/public/nsIParser.h => parser/htmlparser/nsIParser.h
rename : parser/htmlparser/public/nsIParserService.h => parser/htmlparser/nsIParserService.h
rename : parser/htmlparser/public/nsITokenizer.h => parser/htmlparser/nsITokenizer.h
rename : parser/htmlparser/src/nsParser.cpp => parser/htmlparser/nsParser.cpp
rename : parser/htmlparser/src/nsParser.h => parser/htmlparser/nsParser.h
rename : parser/htmlparser/public/nsParserBase.h => parser/htmlparser/nsParserBase.h
rename : parser/htmlparser/public/nsParserCIID.h => parser/htmlparser/nsParserCIID.h
rename : parser/htmlparser/public/nsParserConstants.h => parser/htmlparser/nsParserConstants.h
rename : parser/htmlparser/src/nsParserModule.cpp => parser/htmlparser/nsParserModule.cpp
rename : parser/htmlparser/src/nsParserMsgUtils.cpp => parser/htmlparser/nsParserMsgUtils.cpp
rename : parser/htmlparser/src/nsParserMsgUtils.h => parser/htmlparser/nsParserMsgUtils.h
rename : parser/htmlparser/src/nsParserService.cpp => parser/htmlparser/nsParserService.cpp
rename : parser/htmlparser/src/nsParserService.h => parser/htmlparser/nsParserService.h
rename : parser/htmlparser/src/nsScanner.cpp => parser/htmlparser/nsScanner.cpp
rename : parser/htmlparser/src/nsScanner.h => parser/htmlparser/nsScanner.h
rename : parser/htmlparser/src/nsScannerString.cpp => parser/htmlparser/nsScannerString.cpp
rename : parser/htmlparser/public/nsScannerString.h => parser/htmlparser/nsScannerString.h
rename : parser/htmlparser/public/nsToken.h => parser/htmlparser/nsToken.h
2014-08-26 07:10:53 -07:00

1200 lines
28 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 sw=2 et tw=78: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//#define __INCREMENTAL 1
#include "mozilla/DebugOnly.h"
#include "nsScanner.h"
#include "nsDebug.h"
#include "nsReadableUtils.h"
#include "nsIInputStream.h"
#include "nsIFile.h"
#include "nsNetUtil.h"
#include "nsUTF8Utils.h" // for LossyConvertEncoding
#include "nsCRT.h"
#include "nsParser.h"
#include "nsCharsetSource.h"
#include "mozilla/dom/EncodingUtils.h"
using mozilla::dom::EncodingUtils;
// We replace NUL characters with this character.
static char16_t sInvalid = UCS2_REPLACEMENT_CHAR;
nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :
mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set
{
// Build filter that will be used to filter out characters with
// bits that none of the terminal chars have. This works very well
// because terminal chars often have only the last 4-6 bits set and
// normal ascii letters have bit 7 set. Other letters have even higher
// bits set.
// Calculate filter
const char16_t *current = aTerminateChars;
char16_t terminalChar = *current;
while (terminalChar) {
mFilter &= ~terminalChar;
++current;
terminalChar = *current;
}
}
/**
* Use this constructor if you want i/o to be based on
* a single string you hand in during construction.
* This short cut was added for Javascript.
*
* @update gess 5/12/98
* @param aMode represents the parser mode (nav, other)
* @return
*/
nsScanner::nsScanner(const nsAString& anHTMLString)
{
MOZ_COUNT_CTOR(nsScanner);
mSlidingBuffer = nullptr;
mCountRemaining = 0;
mFirstNonWhitespacePosition = -1;
if (AppendToBuffer(anHTMLString)) {
mSlidingBuffer->BeginReading(mCurrentPosition);
} else {
/* XXX see hack below, re: bug 182067 */
memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
mEndPosition = mCurrentPosition;
}
mMarkPosition = mCurrentPosition;
mIncremental = false;
mUnicodeDecoder = 0;
mCharsetSource = kCharsetUninitialized;
mHasInvalidCharacter = false;
mReplacementCharacter = char16_t(0x0);
}
/**
* Use this constructor if you want i/o to be based on strings
* the scanner receives. If you pass a null filename, you
* can still provide data to the scanner via append.
*/
nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
: mFilename(aFilename)
{
MOZ_COUNT_CTOR(nsScanner);
NS_ASSERTION(!aCreateStream, "This is always true.");
mSlidingBuffer = nullptr;
// XXX This is a big hack. We need to initialize the iterators to something.
// What matters is that mCurrentPosition == mEndPosition, so that our methods
// believe that we are at EOF (see bug 182067). We null out mCurrentPosition
// so that we have some hope of catching null pointer dereferences associated
// with this hack. --darin
memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
mMarkPosition = mCurrentPosition;
mEndPosition = mCurrentPosition;
mIncremental = true;
mFirstNonWhitespacePosition = -1;
mCountRemaining = 0;
mUnicodeDecoder = 0;
mCharsetSource = kCharsetUninitialized;
mHasInvalidCharacter = false;
mReplacementCharacter = char16_t(0x0);
// XML defaults to UTF-8 and about:blank is UTF-8, too.
SetDocumentCharset(NS_LITERAL_CSTRING("UTF-8"), kCharsetFromDocTypeDefault);
}
nsresult nsScanner::SetDocumentCharset(const nsACString& aCharset , int32_t aSource)
{
if (aSource < mCharsetSource) // priority is lower than the current one
return NS_OK;
mCharsetSource = aSource;
nsCString charsetName;
mozilla::DebugOnly<bool> valid =
EncodingUtils::FindEncodingForLabel(aCharset, charsetName);
MOZ_ASSERT(valid, "Should never call with a bogus aCharset.");
if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
return NS_OK; // no difference, don't change it
}
// different, need to change it
mCharset.Assign(charsetName);
mUnicodeDecoder = EncodingUtils::DecoderForEncoding(mCharset);
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
return NS_OK;
}
/**
* default destructor
*
* @update gess 3/25/98
* @param
* @return
*/
nsScanner::~nsScanner() {
delete mSlidingBuffer;
MOZ_COUNT_DTOR(nsScanner);
}
/**
* Resets current offset position of input stream to marked position.
* This allows us to back up to this point if the need should arise,
* such as when tokenization gets interrupted.
* NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
*
* @update gess 5/12/98
* @param
* @return
*/
void nsScanner::RewindToMark(void){
if (mSlidingBuffer) {
mCountRemaining += (Distance(mMarkPosition, mCurrentPosition));
mCurrentPosition = mMarkPosition;
}
}
/**
* Records current offset position in input stream. This allows us
* to back up to this point if the need should arise, such as when
* tokenization gets interrupted.
*
* @update gess 7/29/98
* @param
* @return
*/
int32_t nsScanner::Mark() {
int32_t distance = 0;
if (mSlidingBuffer) {
nsScannerIterator oldStart;
mSlidingBuffer->BeginReading(oldStart);
distance = Distance(oldStart, mCurrentPosition);
mSlidingBuffer->DiscardPrefix(mCurrentPosition);
mSlidingBuffer->BeginReading(mCurrentPosition);
mMarkPosition = mCurrentPosition;
}
return distance;
}
/**
* Insert data to our underlying input buffer as
* if it were read from an input stream.
*
* @update harishd 01/12/99
* @return error code
*/
bool nsScanner::UngetReadable(const nsAString& aBuffer) {
if (!mSlidingBuffer) {
return false;
}
mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
mSlidingBuffer->EndReading(mEndPosition);
uint32_t length = aBuffer.Length();
mCountRemaining += length; // Ref. bug 117441
return true;
}
/**
* Append data to our underlying input buffer as
* if it were read from an input stream.
*
* @update gess4/3/98
* @return error code
*/
nsresult nsScanner::Append(const nsAString& aBuffer) {
if (!AppendToBuffer(aBuffer))
return NS_ERROR_OUT_OF_MEMORY;
return NS_OK;
}
/**
*
*
* @update gess 5/21/98
* @param
* @return
*/
nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen,
nsIRequest *aRequest)
{
nsresult res = NS_OK;
if (mUnicodeDecoder) {
int32_t unicharBufLen = 0;
mUnicodeDecoder->GetMaxLength(aBuffer, aLen, &unicharBufLen);
nsScannerString::Buffer* buffer = nsScannerString::AllocBuffer(unicharBufLen + 1);
NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
char16_t *unichars = buffer->DataStart();
int32_t totalChars = 0;
int32_t unicharLength = unicharBufLen;
int32_t errorPos = -1;
do {
int32_t srcLength = aLen;
res = mUnicodeDecoder->Convert(aBuffer, &srcLength, unichars, &unicharLength);
totalChars += unicharLength;
// Continuation of failure case
if(NS_FAILED(res)) {
// if we failed, we consume one byte, replace it with the replacement
// character and try the conversion again.
// This is only needed because some decoders don't follow the
// nsIUnicodeDecoder contract: they return a failure when *aDestLength
// is 0 rather than the correct NS_OK_UDEC_MOREOUTPUT. See bug 244177
if ((unichars + unicharLength) >= buffer->DataEnd()) {
NS_ERROR("Unexpected end of destination buffer");
break;
}
if (mReplacementCharacter == 0x0 && errorPos == -1) {
errorPos = totalChars;
}
unichars[unicharLength++] = mReplacementCharacter == 0x0 ?
mUnicodeDecoder->GetCharacterForUnMapped() :
mReplacementCharacter;
unichars = unichars + unicharLength;
unicharLength = unicharBufLen - (++totalChars);
mUnicodeDecoder->Reset();
if(((uint32_t) (srcLength + 1)) > aLen) {
srcLength = aLen;
}
else {
++srcLength;
}
aBuffer += srcLength;
aLen -= srcLength;
}
} while (NS_FAILED(res) && (aLen > 0));
buffer->SetDataLength(totalChars);
// Don't propagate return code of unicode decoder
// since it doesn't reflect on our success or failure
// - Ref. bug 87110
res = NS_OK;
if (!AppendToBuffer(buffer, aRequest, errorPos))
res = NS_ERROR_OUT_OF_MEMORY;
}
else {
NS_WARNING("No decoder found.");
res = NS_ERROR_FAILURE;
}
return res;
}
/**
* retrieve next char from scanners internal input stream
*
* @update gess 3/25/98
* @param
* @return error code reflecting read status
*/
nsresult nsScanner::GetChar(char16_t& aChar) {
if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
aChar = 0;
return kEOF;
}
aChar = *mCurrentPosition++;
--mCountRemaining;
return NS_OK;
}
/**
* peek ahead to consume next char from scanner's internal
* input buffer
*
* @update gess 3/25/98
* @param
* @return
*/
nsresult nsScanner::Peek(char16_t& aChar, uint32_t aOffset) {
aChar = 0;
if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
return kEOF;
}
if (aOffset > 0) {
if (mCountRemaining <= aOffset)
return kEOF;
nsScannerIterator pos = mCurrentPosition;
pos.advance(aOffset);
aChar=*pos;
}
else {
aChar=*mCurrentPosition;
}
return NS_OK;
}
nsresult nsScanner::Peek(nsAString& aStr, int32_t aNumChars, int32_t aOffset)
{
if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
return kEOF;
}
nsScannerIterator start, end;
start = mCurrentPosition;
if ((int32_t)mCountRemaining <= aOffset) {
return kEOF;
}
if (aOffset > 0) {
start.advance(aOffset);
}
if (mCountRemaining < uint32_t(aNumChars + aOffset)) {
end = mEndPosition;
}
else {
end = start;
end.advance(aNumChars);
}
CopyUnicodeTo(start, end, aStr);
return NS_OK;
}
/**
* Skip whitespace on scanner input stream
*
* @update gess 3/25/98
* @param
* @return error status
*/
nsresult nsScanner::SkipWhitespace(int32_t& aNewlinesSkipped) {
if (!mSlidingBuffer) {
return kEOF;
}
char16_t theChar = 0;
nsresult result = Peek(theChar);
if (NS_FAILED(result)) {
return result;
}
nsScannerIterator current = mCurrentPosition;
bool done = false;
bool skipped = false;
while (!done && current != mEndPosition) {
switch(theChar) {
case '\n':
case '\r': ++aNewlinesSkipped;
case ' ' :
case '\t':
{
skipped = true;
char16_t thePrevChar = theChar;
theChar = (++current != mEndPosition) ? *current : '\0';
if ((thePrevChar == '\r' && theChar == '\n') ||
(thePrevChar == '\n' && theChar == '\r')) {
theChar = (++current != mEndPosition) ? *current : '\0'; // CRLF == LFCR => LF
}
}
break;
default:
done = true;
break;
}
}
if (skipped) {
SetPosition(current);
if (current == mEndPosition) {
result = kEOF;
}
}
return result;
}
/**
* Skip over chars as long as they equal given char
*
* @update gess 3/25/98
* @param
* @return error code
*/
nsresult nsScanner::SkipOver(char16_t aSkipChar){
if (!mSlidingBuffer) {
return kEOF;
}
char16_t ch=0;
nsresult result=NS_OK;
while(NS_OK==result) {
result=Peek(ch);
if(NS_OK == result) {
if(ch!=aSkipChar) {
break;
}
GetChar(ch);
}
else break;
} //while
return result;
}
#if 0
void DoErrTest(nsString& aString) {
int32_t pos=aString.FindChar(0);
if(kNotFound<pos) {
if(aString.Length()-1!=pos) {
}
}
}
void DoErrTest(nsCString& aString) {
int32_t pos=aString.FindChar(0);
if(kNotFound<pos) {
if(aString.Length()-1!=pos) {
}
}
}
#endif
/**
* Consume characters until you run into space, a '<', a '>', or a '/'.
*
* @param aString - receives new data from stream
* @return error code
*/
nsresult nsScanner::ReadTagIdentifier(nsScannerSharedSubstring& aString) {
if (!mSlidingBuffer) {
return kEOF;
}
char16_t theChar=0;
nsresult result=Peek(theChar);
nsScannerIterator current, end;
bool found=false;
current = mCurrentPosition;
end = mEndPosition;
// Loop until we find an illegal character. Everything is then appended
// later.
while(current != end && !found) {
theChar=*current;
switch(theChar) {
case '\n':
case '\r':
case ' ' :
case '\t':
case '\v':
case '\f':
case '<':
case '>':
case '/':
found = true;
break;
case '\0':
ReplaceCharacter(current, sInvalid);
break;
default:
break;
}
if (!found) {
++current;
}
}
// Don't bother appending nothing.
if (current != mCurrentPosition) {
AppendUnicodeTo(mCurrentPosition, current, aString);
}
SetPosition(current);
if (current == end) {
result = kEOF;
}
//DoErrTest(aString);
return result;
}
/**
* Consume characters until you run into a char that's not valid in an
* entity name
*
* @param aString - receives new data from stream
* @return error code
*/
nsresult nsScanner::ReadEntityIdentifier(nsString& aString) {
if (!mSlidingBuffer) {
return kEOF;
}
char16_t theChar=0;
nsresult result=Peek(theChar);
nsScannerIterator origin, current, end;
bool found=false;
origin = mCurrentPosition;
current = mCurrentPosition;
end = mEndPosition;
while(current != end) {
theChar=*current;
if(theChar) {
found=false;
switch(theChar) {
case '_':
case '-':
case '.':
// Don't allow ':' in entity names. See bug 23791
found = true;
break;
default:
found = ('a'<=theChar && theChar<='z') ||
('A'<=theChar && theChar<='Z') ||
('0'<=theChar && theChar<='9');
break;
}
if(!found) {
AppendUnicodeTo(mCurrentPosition, current, aString);
break;
}
}
++current;
}
SetPosition(current);
if (current == end) {
AppendUnicodeTo(origin, current, aString);
return kEOF;
}
//DoErrTest(aString);
return result;
}
/**
* Consume digits
*
* @param aString - should contain digits
* @return error code
*/
nsresult nsScanner::ReadNumber(nsString& aString,int32_t aBase) {
if (!mSlidingBuffer) {
return kEOF;
}
NS_ASSERTION(aBase == 10 || aBase == 16,"base value not supported");
char16_t theChar=0;
nsresult result=Peek(theChar);
nsScannerIterator origin, current, end;
origin = mCurrentPosition;
current = origin;
end = mEndPosition;
bool done = false;
while(current != end) {
theChar=*current;
if(theChar) {
done = (theChar < '0' || theChar > '9') &&
((aBase == 16)? (theChar < 'A' || theChar > 'F') &&
(theChar < 'a' || theChar > 'f')
:true);
if(done) {
AppendUnicodeTo(origin, current, aString);
break;
}
}
++current;
}
SetPosition(current);
if (current == end) {
AppendUnicodeTo(origin, current, aString);
return kEOF;
}
//DoErrTest(aString);
return result;
}
/**
* Consume characters until you find the terminal char
*
* @update gess 3/25/98
* @param aString receives new data from stream
* @param addTerminal tells us whether to append terminal to aString
* @return error code
*/
nsresult nsScanner::ReadWhitespace(nsScannerSharedSubstring& aString,
int32_t& aNewlinesSkipped,
bool& aHaveCR) {
aHaveCR = false;
if (!mSlidingBuffer) {
return kEOF;
}
char16_t theChar = 0;
nsresult result = Peek(theChar);
if (NS_FAILED(result)) {
return result;
}
nsScannerIterator origin, current, end;
bool done = false;
origin = mCurrentPosition;
current = origin;
end = mEndPosition;
bool haveCR = false;
while(!done && current != end) {
switch(theChar) {
case '\n':
case '\r':
{
++aNewlinesSkipped;
char16_t thePrevChar = theChar;
theChar = (++current != end) ? *current : '\0';
if ((thePrevChar == '\r' && theChar == '\n') ||
(thePrevChar == '\n' && theChar == '\r')) {
theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
haveCR = true;
} else if (thePrevChar == '\r') {
// Lone CR becomes CRLF; callers should know to remove extra CRs
AppendUnicodeTo(origin, current, aString);
aString.writable().Append(char16_t('\n'));
origin = current;
haveCR = true;
}
}
break;
case ' ' :
case '\t':
theChar = (++current != end) ? *current : '\0';
break;
default:
done = true;
AppendUnicodeTo(origin, current, aString);
break;
}
}
SetPosition(current);
if (current == end) {
AppendUnicodeTo(origin, current, aString);
result = kEOF;
}
aHaveCR = haveCR;
return result;
}
//XXXbz callers of this have to manage their lone '\r' themselves if they want
//it to work. Good thing they're all in view-source and it deals.
nsresult nsScanner::ReadWhitespace(nsScannerIterator& aStart,
nsScannerIterator& aEnd,
int32_t& aNewlinesSkipped) {
if (!mSlidingBuffer) {
return kEOF;
}
char16_t theChar = 0;
nsresult result = Peek(theChar);
if (NS_FAILED(result)) {
return result;
}
nsScannerIterator origin, current, end;
bool done = false;
origin = mCurrentPosition;
current = origin;
end = mEndPosition;
while(!done && current != end) {
switch(theChar) {
case '\n':
case '\r': ++aNewlinesSkipped;
case ' ' :
case '\t':
{
char16_t thePrevChar = theChar;
theChar = (++current != end) ? *current : '\0';
if ((thePrevChar == '\r' && theChar == '\n') ||
(thePrevChar == '\n' && theChar == '\r')) {
theChar = (++current != end) ? *current : '\0'; // CRLF == LFCR => LF
}
}
break;
default:
done = true;
aStart = origin;
aEnd = current;
break;
}
}
SetPosition(current);
if (current == end) {
aStart = origin;
aEnd = current;
result = kEOF;
}
return result;
}
/**
* Consume characters until you encounter one contained in given
* input set.
*
* @update gess 3/25/98
* @param aString will contain the result of this method
* @param aTerminalSet is an ordered string that contains
* the set of INVALID characters
* @return error code
*/
nsresult nsScanner::ReadUntil(nsAString& aString,
const nsReadEndCondition& aEndCondition,
bool addTerminal)
{
if (!mSlidingBuffer) {
return kEOF;
}
nsScannerIterator origin, current;
const char16_t* setstart = aEndCondition.mChars;
const char16_t* setcurrent;
origin = mCurrentPosition;
current = origin;
char16_t theChar=0;
nsresult result=Peek(theChar);
if (NS_FAILED(result)) {
return result;
}
while (current != mEndPosition) {
theChar = *current;
if (theChar == '\0') {
ReplaceCharacter(current, sInvalid);
theChar = sInvalid;
}
// Filter out completely wrong characters
// Check if all bits are in the required area
if(!(theChar & aEndCondition.mFilter)) {
// They were. Do a thorough check.
setcurrent = setstart;
while (*setcurrent) {
if (*setcurrent == theChar) {
if(addTerminal)
++current;
AppendUnicodeTo(origin, current, aString);
SetPosition(current);
//DoErrTest(aString);
return NS_OK;
}
++setcurrent;
}
}
++current;
}
// If we are here, we didn't find any terminator in the string and
// current = mEndPosition
SetPosition(current);
AppendUnicodeTo(origin, current, aString);
return kEOF;
}
nsresult nsScanner::ReadUntil(nsScannerSharedSubstring& aString,
const nsReadEndCondition& aEndCondition,
bool addTerminal)
{
if (!mSlidingBuffer) {
return kEOF;
}
nsScannerIterator origin, current;
const char16_t* setstart = aEndCondition.mChars;
const char16_t* setcurrent;
origin = mCurrentPosition;
current = origin;
char16_t theChar=0;
nsresult result=Peek(theChar);
if (NS_FAILED(result)) {
return result;
}
while (current != mEndPosition) {
theChar = *current;
if (theChar == '\0') {
ReplaceCharacter(current, sInvalid);
theChar = sInvalid;
}
// Filter out completely wrong characters
// Check if all bits are in the required area
if(!(theChar & aEndCondition.mFilter)) {
// They were. Do a thorough check.
setcurrent = setstart;
while (*setcurrent) {
if (*setcurrent == theChar) {
if(addTerminal)
++current;
AppendUnicodeTo(origin, current, aString);
SetPosition(current);
//DoErrTest(aString);
return NS_OK;
}
++setcurrent;
}
}
++current;
}
// If we are here, we didn't find any terminator in the string and
// current = mEndPosition
SetPosition(current);
AppendUnicodeTo(origin, current, aString);
return kEOF;
}
nsresult nsScanner::ReadUntil(nsScannerIterator& aStart,
nsScannerIterator& aEnd,
const nsReadEndCondition &aEndCondition,
bool addTerminal)
{
if (!mSlidingBuffer) {
return kEOF;
}
nsScannerIterator origin, current;
const char16_t* setstart = aEndCondition.mChars;
const char16_t* setcurrent;
origin = mCurrentPosition;
current = origin;
char16_t theChar=0;
nsresult result=Peek(theChar);
if (NS_FAILED(result)) {
aStart = aEnd = current;
return result;
}
while (current != mEndPosition) {
theChar = *current;
if (theChar == '\0') {
ReplaceCharacter(current, sInvalid);
theChar = sInvalid;
}
// Filter out completely wrong characters
// Check if all bits are in the required area
if(!(theChar & aEndCondition.mFilter)) {
// They were. Do a thorough check.
setcurrent = setstart;
while (*setcurrent) {
if (*setcurrent == theChar) {
if(addTerminal)
++current;
aStart = origin;
aEnd = current;
SetPosition(current);
return NS_OK;
}
++setcurrent;
}
}
++current;
}
// If we are here, we didn't find any terminator in the string and
// current = mEndPosition
SetPosition(current);
aStart = origin;
aEnd = current;
return kEOF;
}
/**
* Consumes chars until you see the given terminalChar
*
* @update gess 3/25/98
* @param
* @return error code
*/
nsresult nsScanner::ReadUntil(nsAString& aString,
char16_t aTerminalChar,
bool addTerminal)
{
if (!mSlidingBuffer) {
return kEOF;
}
nsScannerIterator origin, current;
origin = mCurrentPosition;
current = origin;
char16_t theChar;
nsresult result = Peek(theChar);
if (NS_FAILED(result)) {
return result;
}
while (current != mEndPosition) {
theChar = *current;
if (theChar == '\0') {
ReplaceCharacter(current, sInvalid);
theChar = sInvalid;
}
if (aTerminalChar == theChar) {
if(addTerminal)
++current;
AppendUnicodeTo(origin, current, aString);
SetPosition(current);
return NS_OK;
}
++current;
}
// If we are here, we didn't find any terminator in the string and
// current = mEndPosition
AppendUnicodeTo(origin, current, aString);
SetPosition(current);
return kEOF;
}
void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
{
aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
}
void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
{
aPosition = mCurrentPosition;
}
void nsScanner::EndReading(nsScannerIterator& aPosition)
{
aPosition = mEndPosition;
}
void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate, bool aReverse)
{
if (mSlidingBuffer) {
#ifdef DEBUG
uint32_t origRemaining = mCountRemaining;
#endif
if (aReverse) {
mCountRemaining += (Distance(aPosition, mCurrentPosition));
}
else {
mCountRemaining -= (Distance(mCurrentPosition, aPosition));
}
NS_ASSERTION((mCountRemaining >= origRemaining && aReverse) ||
(mCountRemaining <= origRemaining && !aReverse),
"Improper use of nsScanner::SetPosition. Make sure to set the"
" aReverse parameter correctly");
mCurrentPosition = aPosition;
if (aTerminate && (mCurrentPosition == mEndPosition)) {
mMarkPosition = mCurrentPosition;
mSlidingBuffer->DiscardPrefix(mCurrentPosition);
}
}
}
void nsScanner::ReplaceCharacter(nsScannerIterator& aPosition,
char16_t aChar)
{
if (mSlidingBuffer) {
mSlidingBuffer->ReplaceCharacter(aPosition, aChar);
}
}
bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf,
nsIRequest *aRequest,
int32_t aErrorPos)
{
uint32_t countRemaining = mCountRemaining;
if (!mSlidingBuffer) {
mSlidingBuffer = new nsScannerString(aBuf);
if (!mSlidingBuffer)
return false;
mSlidingBuffer->BeginReading(mCurrentPosition);
mMarkPosition = mCurrentPosition;
mSlidingBuffer->EndReading(mEndPosition);
mCountRemaining = aBuf->DataLength();
}
else {
mSlidingBuffer->AppendBuffer(aBuf);
if (mCurrentPosition == mEndPosition) {
mSlidingBuffer->BeginReading(mCurrentPosition);
}
mSlidingBuffer->EndReading(mEndPosition);
mCountRemaining += aBuf->DataLength();
}
if (aErrorPos != -1 && !mHasInvalidCharacter) {
mHasInvalidCharacter = true;
mFirstInvalidPosition = mCurrentPosition;
mFirstInvalidPosition.advance(countRemaining + aErrorPos);
}
if (mFirstNonWhitespacePosition == -1) {
nsScannerIterator iter(mCurrentPosition);
nsScannerIterator end(mEndPosition);
while (iter != end) {
if (!nsCRT::IsAsciiSpace(*iter)) {
mFirstNonWhitespacePosition = Distance(mCurrentPosition, iter);
break;
}
++iter;
}
}
return true;
}
/**
* call this to copy bytes out of the scanner that have not yet been consumed
* by the tokenization process.
*
* @update gess 5/12/98
* @param aCopyBuffer is where the scanner buffer will be copied to
* @return nada
*/
void nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
if (!mSlidingBuffer) {
aCopyBuffer.Truncate();
return;
}
nsScannerIterator start, end;
start = mCurrentPosition;
end = mEndPosition;
CopyUnicodeTo(start, end, aCopyBuffer);
}
/**
* Retrieve the name of the file that the scanner is reading from.
* In some cases, it's just a given name, because the scanner isn't
* really reading from a file.
*
* @update gess 5/12/98
* @return
*/
nsString& nsScanner::GetFilename(void) {
return mFilename;
}
/**
* Conduct self test. Actually, selftesting for this class
* occurs in the parser selftest.
*
* @update gess 3/25/98
* @param
* @return
*/
void nsScanner::SelfTest(void) {
#ifdef _DEBUG
#endif
}
void nsScanner::OverrideReplacementCharacter(char16_t aReplacementCharacter)
{
mReplacementCharacter = aReplacementCharacter;
if (mHasInvalidCharacter) {
ReplaceCharacter(mFirstInvalidPosition, mReplacementCharacter);
}
}