/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ /** * MODULE NOTES: * @update gess 4/1/98 * * The scanner is a low-level service class that knows * how to consume characters out of an (internal) stream. * This class also offers a series of utility methods * that most tokenizers want, such as readUntil() * and SkipWhitespace(). */ #ifndef SCANNER #define SCANNER #include "nsCOMPtr.h" #include "nsString.h" #include "nsIParser.h" #include "prtypes.h" #include "nsIUnicodeDecoder.h" #include "nsScannerString.h" class nsParser; class nsReadEndCondition { public: const PRUnichar *mChars; PRUnichar mFilter; explicit nsReadEndCondition(const PRUnichar* aTerminateChars); private: nsReadEndCondition(const nsReadEndCondition& aOther); // No copying void operator=(const nsReadEndCondition& aOther); // No assigning }; class nsScanner { public: /** * Use this constructor if you want i/o to be based on * a single string you hand in during construction. * This short cut was added for Javascript. * * @update ftang 3/02/99 * @param aCharset charset * @param aCharsetSource - where the charset info came from * @param aMode represents the parser mode (nav, other) * @return */ nsScanner(const nsAString& anHTMLString, const nsACString& aCharset, PRInt32 aSource); /** * Use this constructor if you want i/o to be based on * a file (therefore a stream) or just data you provide via Append(). * * @update ftang 3/02/99 * @param aCharset charset * @param aCharsetSource - where the charset info came from * @param aMode represents the parser mode (nav, other) * @return */ nsScanner(nsString& aFilename,PRBool aCreateStream, const nsACString& aCharset, PRInt32 aSource); ~nsScanner(); /** * retrieve next char from internal input stream * * @update gess 3/25/98 * @param ch is the char to accept new value * @return error code reflecting read status */ nsresult GetChar(PRUnichar& ch); /** * peek ahead to consume next char from scanner's internal * input buffer * * @update gess 3/25/98 * @param ch is the char to accept new value * @return error code reflecting read status */ nsresult Peek(PRUnichar& ch, PRUint32 aOffset=0); nsresult Peek(nsAString& aStr, PRInt32 aNumChars, PRInt32 aOffset = 0); /** * Skip over chars as long as they equal given char * * @update gess 3/25/98 * @param char to be skipped * @return error code */ nsresult SkipOver(PRUnichar aSkipChar); /** * Skip whitespace on scanner input stream * * @update gess 3/25/98 * @return error status */ nsresult SkipWhitespace(PRInt32& aNewlinesSkipped); /** * Consume characters until you run into space, a '<', a '>', or a '/'. * * @param aString - receives new data from stream * @return error code */ nsresult ReadTagIdentifier(nsScannerSharedSubstring& aString); /** * Consume characters until you run into a char that's not valid in an * entity name * * @param aString - receives new data from stream * @return error code */ nsresult ReadEntityIdentifier(nsString& aString); nsresult ReadNumber(nsString& aString,PRInt32 aBase); nsresult ReadWhitespace(nsScannerSharedSubstring& aString, PRInt32& aNewlinesSkipped, PRBool& aHaveCR); nsresult ReadWhitespace(nsScannerIterator& aStart, nsScannerIterator& aEnd, PRInt32& aNewlinesSkipped); /** * Consume characters until you find the terminal char * * @update gess 3/25/98 * @param aString receives new data from stream * @param aTerminal contains terminating char * @param addTerminal tells us whether to append terminal to aString * @return error code */ nsresult ReadUntil(nsAString& aString, PRUnichar aTerminal, PRBool addTerminal); /** * Consume characters until you find one contained in given * terminal set. * * @update gess 3/25/98 * @param aString receives new data from stream * @param aTermSet contains set of terminating chars * @param addTerminal tells us whether to append terminal to aString * @return error code */ nsresult ReadUntil(nsAString& aString, const nsReadEndCondition& aEndCondition, PRBool addTerminal); nsresult ReadUntil(nsScannerSharedSubstring& aString, const nsReadEndCondition& aEndCondition, PRBool addTerminal); nsresult ReadUntil(nsScannerIterator& aStart, nsScannerIterator& aEnd, const nsReadEndCondition& aEndCondition, PRBool addTerminal); /** * Records current offset position in input stream. This allows us * to back up to this point if the need should arise, such as when * tokenization gets interrupted. * * @update gess 5/12/98 * @param * @return */ PRInt32 Mark(void); /** * Resets current offset position of input stream to marked position. * This allows us to back up to this point if the need should arise, * such as when tokenization gets interrupted. * NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST! * * @update gess 5/12/98 * @param * @return */ void RewindToMark(void); /** * * * @update harishd 01/12/99 * @param * @return */ PRBool UngetReadable(const nsAString& aBuffer); /** * * * @update gess 5/13/98 * @param * @return */ nsresult Append(const nsAString& aBuffer); /** * * * @update gess 5/21/98 * @param * @return */ nsresult Append(const char* aBuffer, PRUint32 aLen, nsIRequest *aRequest); /** * Call this to copy bytes out of the scanner that have not yet been consumed * by the tokenization process. * * @update gess 5/12/98 * @param aCopyBuffer is where the scanner buffer will be copied to * @return nada */ void CopyUnusedData(nsString& aCopyBuffer); /** * Retrieve the name of the file that the scanner is reading from. * In some cases, it's just a given name, because the scanner isn't * really reading from a file. * * @update gess 5/12/98 * @return */ nsString& GetFilename(void); static void SelfTest(); /** * Use this setter to change the scanner's unicode decoder * * @update ftang 3/02/99 * @param aCharset a normalized (alias resolved) charset name * @param aCharsetSource- where the charset info came from * @return */ nsresult SetDocumentCharset(const nsACString& aCharset, PRInt32 aSource); void BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd); void CurrentPosition(nsScannerIterator& aPosition); void EndReading(nsScannerIterator& aPosition); void SetPosition(nsScannerIterator& aPosition, PRBool aTruncate = PR_FALSE, PRBool aReverse = PR_FALSE); void ReplaceCharacter(nsScannerIterator& aPosition, PRUnichar aChar); /** * Internal method used to cause the internal buffer to * be filled with data. * * @update gess4/3/98 */ PRBool IsIncremental(void) {return mIncremental;} void SetIncremental(PRBool anIncrValue) {mIncremental=anIncrValue;} /** * Return the position of the first non-whitespace * character. This is only reliable before consumers start * reading from this scanner. */ PRInt32 FirstNonWhitespacePosition() { return mFirstNonWhitespacePosition; } void SetParser(nsParser *aParser) { mParser = aParser; } /** * Override replacement character used by nsIUnicodeDecoder. * Default behavior is that it uses nsIUnicodeDecoder's mapping. * * @param aReplacementCharacter the replacement character * XML (expat) parser uses 0xffff */ void OverrideReplacementCharacter(PRUnichar aReplacementCharacter); protected: PRBool AppendToBuffer(nsScannerString::Buffer *, nsIRequest *aRequest, PRInt32 aErrorPos = -1); PRBool AppendToBuffer(const nsAString& aStr) { nsScannerString::Buffer* buf = nsScannerString::AllocBufferFromString(aStr); if (!buf) return PR_FALSE; AppendToBuffer(buf, nsnull); return PR_TRUE; } nsScannerString* mSlidingBuffer; nsScannerIterator mCurrentPosition; // The position we will next read from in the scanner buffer nsScannerIterator mMarkPosition; // The position last marked (we may rewind to here) nsScannerIterator mEndPosition; // The current end of the scanner buffer nsScannerIterator mFirstInvalidPosition; // The position of the first invalid character that was detected nsString mFilename; PRUint32 mCountRemaining; // The number of bytes still to be read // from the scanner buffer PRPackedBool mIncremental; PRPackedBool mHasInvalidCharacter; PRUnichar mReplacementCharacter; PRInt32 mFirstNonWhitespacePosition; PRInt32 mCharsetSource; nsCString mCharset; nsCOMPtr mUnicodeDecoder; nsParser *mParser; private: nsScanner &operator =(const nsScanner &); // Not implemented. }; #endif