gecko/dom/base/nsPlainTextSerializer.h
Ehsan Akhgari 4cf4343937 Bug 1125963 - Part 2: Rename mPreFormatted to mPreFormattedMail in order to clarify the meaning of this member
This member is set to true when Gecko detects the DOM structure
created by the Thunderbird mail code.
2015-03-04 00:54:40 -05:00

242 lines
9.1 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* nsIContentSerializer implementation that can be used with an
* nsIDocumentEncoder to convert a DOM into plaintext in a nice way
* (eg for copy/paste as plaintext).
*/
#ifndef nsPlainTextSerializer_h__
#define nsPlainTextSerializer_h__
#include "mozilla/Attributes.h"
#include "nsAutoPtr.h"
#include "nsCOMPtr.h"
#include "nsIAtom.h"
#include "nsIContentSerializer.h"
#include "nsIDocumentEncoder.h"
#include "nsILineBreaker.h"
#include "nsString.h"
#include "nsTArray.h"
#include <stack>
class nsIContent;
namespace mozilla {
namespace dom {
class Element;
} // namespace dom
} // namespace mozilla
class nsPlainTextSerializer MOZ_FINAL : public nsIContentSerializer
{
public:
nsPlainTextSerializer();
NS_DECL_ISUPPORTS
// nsIContentSerializer
NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
const char* aCharSet, bool aIsCopying,
bool aIsWholeDocument) MOZ_OVERRIDE;
NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
int32_t aEndOffset, nsAString& aStr) MOZ_OVERRIDE;
NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection,
int32_t aStartOffset, int32_t aEndOffset,
nsAString& aStr) MOZ_OVERRIDE;
NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI,
int32_t aStartOffset,
int32_t aEndOffset,
nsAString& aStr) MOZ_OVERRIDE { return NS_OK; }
NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset,
int32_t aEndOffset, nsAString& aStr) MOZ_OVERRIDE { return NS_OK; }
NS_IMETHOD AppendDoctype(nsIContent *aDoctype,
nsAString& aStr) MOZ_OVERRIDE { return NS_OK; }
NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
mozilla::dom::Element* aOriginalElement,
nsAString& aStr) MOZ_OVERRIDE;
NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
nsAString& aStr) MOZ_OVERRIDE;
NS_IMETHOD Flush(nsAString& aStr) MOZ_OVERRIDE;
NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument,
nsAString& aStr) MOZ_OVERRIDE;
private:
~nsPlainTextSerializer();
nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet);
void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
void EndLine(bool softlinebreak, bool aBreakBySpace = false);
void EnsureVerticalSpace(int32_t noOfRows);
void FlushLine();
void OutputQuotesAndIndent(bool stripTrailingSpaces=false);
void Output(nsString& aString);
void Write(const nsAString& aString);
bool IsInPre();
bool IsInOL();
bool IsCurrentNodeConverted();
bool MustSuppressLeaf();
/**
* Returns the local name of the element as an atom if the element is an
* HTML element and the atom is a static atom. Otherwise, nullptr is returned.
*/
static nsIAtom* GetIdForContent(nsIContent* aContent);
nsresult DoOpenContainer(nsIAtom* aTag);
nsresult DoCloseContainer(nsIAtom* aTag);
nsresult DoAddLeaf(nsIAtom* aTag);
void DoAddText(bool aIsWhitespace, const nsAString& aText);
// Inlined functions
inline bool MayWrap()
{
return mWrapColumn &&
((mFlags & nsIDocumentEncoder::OutputFormatted) ||
(mFlags & nsIDocumentEncoder::OutputWrap));
}
inline bool DoOutput()
{
return mHeadLevel == 0;
}
// Stack handling functions
bool GetLastBool(const nsTArray<bool>& aStack);
void SetLastBool(nsTArray<bool>& aStack, bool aValue);
void PushBool(nsTArray<bool>& aStack, bool aValue);
bool PopBool(nsTArray<bool>& aStack);
bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag);
bool IsElementPreformatted(mozilla::dom::Element* aElement);
bool IsElementBlock(mozilla::dom::Element* aElement);
private:
nsString mCurrentLine;
uint32_t mHeadLevel;
bool mAtFirstColumn;
// Handling of quoted text (for mail):
// Quotes need to be wrapped differently from non-quoted text,
// because quoted text has a few extra characters (e.g. ">> ")
// which makes the line length longer.
// Mail can represent quotes in different ways:
// Not wrapped in any special tag (if mail.compose.wrap_to_window_width)
// or in a <span>.
bool mDontWrapAnyQuotes; // no special quote markers
bool mStructs; // Output structs (pref)
// If we've just written out a cite blockquote, we need to remember it
// so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
// old messages).
bool mHasWrittenCiteBlockquote;
int32_t mIndent;
// mInIndentString keeps a header that has to be written in the indent.
// That could be, for instance, the bullet in a bulleted list.
nsString mInIndentString;
int32_t mCiteQuoteLevel;
int32_t mFlags;
int32_t mFloatingLines; // To store the number of lazy line breaks
// The wrap column is how many standard sized chars (western languages)
// should be allowed on a line. There could be less chars if the chars
// are wider than latin chars of more if the chars are more narrow.
uint32_t mWrapColumn;
// The width of the line as it will appear on the screen (approx.)
uint32_t mCurrentLineWidth;
// Treat quoted text as though it's preformatted -- don't wrap it.
// Having it on a pref is a temporary measure, See bug 69638.
int32_t mSpanLevel;
int32_t mEmptyLines; // Will be the number of empty lines before
// the current. 0 if we are starting a new
// line and -1 if we are in a line.
bool mInWhitespace;
bool mPreFormattedMail; // we're dealing with special DOM
// used by Thunderbird code.
bool mStartedOutput; // we've produced at least a character
// While handling a new tag, this variable should remind if any line break
// is due because of a closing tag. Setting it to "TRUE" while closing the tags.
// Hence opening tags are guaranteed to start with appropriate line breaks.
bool mLineBreakDue;
bool mPreformattedBlockBoundary;
nsString mURL;
int32_t mHeaderStrategy; /* Header strategy (pref)
0 = no indention
1 = indention, increased with
header level (default)
2 = numbering and slight indention */
int32_t mHeaderCounter[7]; /* For header-numbering:
Number of previous headers of
the same depth and in the same
section.
mHeaderCounter[1] for <h1> etc. */
nsRefPtr<mozilla::dom::Element> mElement;
// For handling table rows
nsAutoTArray<bool, 8> mHasWrittenCellsForRow;
// Values gotten in OpenContainer that is (also) needed in CloseContainer
nsAutoTArray<bool, 8> mIsInCiteBlockquote;
// The output data
nsAString* mOutputString;
// The tag stack: the stack of tags we're operating on, so we can nest.
// The stack only ever points to static atoms, so they don't need to be
// refcounted.
nsIAtom** mTagStack;
uint32_t mTagStackIndex;
// The stack indicating whether the elements we've been operating on are
// CSS preformatted elements, so that we can tell if the text inside them
// should be formatted.
std::stack<bool> mPreformatStack;
// Content in the stack above this index should be ignored:
uint32_t mIgnoreAboveIndex;
// The stack for ordered lists
int32_t *mOLStack;
uint32_t mOLStackIndex;
uint32_t mULCount;
nsString mLineBreak;
nsCOMPtr<nsILineBreaker> mLineBreaker;
// Conveniance constant. It would be nice to have it as a const static
// variable, but that causes issues with OpenBSD and module unloading.
const nsString kSpace;
// If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child
// nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored.
// mIgnoredChildNodeLevel is used to tell if current node is an ignorable
// child node. The initial value of mIgnoredChildNodeLevel is 0. When
// serializer enters those specific nodes, mIgnoredChildNodeLevel increases
// and is greater than 0. Otherwise when serializer leaves those nodes,
// mIgnoredChildNodeLevel decreases.
uint32_t mIgnoredChildNodeLevel;
};
nsresult
NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
#endif