gecko/dom/base/nsPlainTextSerializer.h

251 lines
9.3 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* nsIContentSerializer implementation that can be used with an
* nsIDocumentEncoder to convert a DOM into plaintext in a nice way
* (eg for copy/paste as plaintext).
*/
#ifndef nsPlainTextSerializer_h__
#define nsPlainTextSerializer_h__
#include "mozilla/Attributes.h"
#include "nsAutoPtr.h"
#include "nsCOMPtr.h"
#include "nsIAtom.h"
#include "nsIContentSerializer.h"
#include "nsIDocumentEncoder.h"
#include "nsILineBreaker.h"
#include "nsString.h"
#include "nsTArray.h"
#include <stack>
class nsIContent;
namespace mozilla {
namespace dom {
class Element;
} // namespace dom
} // namespace mozilla
class nsPlainTextSerializer final : public nsIContentSerializer
{
public:
nsPlainTextSerializer();
NS_DECL_ISUPPORTS
// nsIContentSerializer
NS_IMETHOD Init(uint32_t flags, uint32_t aWrapColumn,
const char* aCharSet, bool aIsCopying,
bool aIsWholeDocument) override;
NS_IMETHOD AppendText(nsIContent* aText, int32_t aStartOffset,
int32_t aEndOffset, nsAString& aStr) override;
NS_IMETHOD AppendCDATASection(nsIContent* aCDATASection,
int32_t aStartOffset, int32_t aEndOffset,
nsAString& aStr) override;
NS_IMETHOD AppendProcessingInstruction(nsIContent* aPI,
int32_t aStartOffset,
int32_t aEndOffset,
nsAString& aStr) override { return NS_OK; }
NS_IMETHOD AppendComment(nsIContent* aComment, int32_t aStartOffset,
int32_t aEndOffset, nsAString& aStr) override { return NS_OK; }
NS_IMETHOD AppendDoctype(nsIContent *aDoctype,
nsAString& aStr) override { return NS_OK; }
NS_IMETHOD AppendElementStart(mozilla::dom::Element* aElement,
mozilla::dom::Element* aOriginalElement,
nsAString& aStr) override;
NS_IMETHOD AppendElementEnd(mozilla::dom::Element* aElement,
nsAString& aStr) override;
NS_IMETHOD Flush(nsAString& aStr) override;
NS_IMETHOD AppendDocumentStart(nsIDocument *aDocument,
nsAString& aStr) override;
private:
~nsPlainTextSerializer();
nsresult GetAttributeValue(nsIAtom* aName, nsString& aValueRet);
void AddToLine(const char16_t* aStringToAdd, int32_t aLength);
void EndLine(bool softlinebreak, bool aBreakBySpace = false);
void EnsureVerticalSpace(int32_t noOfRows);
void FlushLine();
void OutputQuotesAndIndent(bool stripTrailingSpaces=false);
void Output(nsString& aString);
void Write(const nsAString& aString);
bool IsInPre();
bool IsInOL();
bool IsCurrentNodeConverted();
bool MustSuppressLeaf();
/**
* Returns the local name of the element as an atom if the element is an
* HTML element and the atom is a static atom. Otherwise, nullptr is returned.
*/
static nsIAtom* GetIdForContent(nsIContent* aContent);
nsresult DoOpenContainer(nsIAtom* aTag);
nsresult DoCloseContainer(nsIAtom* aTag);
nsresult DoAddLeaf(nsIAtom* aTag);
void DoAddText(bool aIsWhitespace, const nsAString& aText);
// Inlined functions
inline bool MayWrap()
{
return mWrapColumn &&
((mFlags & nsIDocumentEncoder::OutputFormatted) ||
(mFlags & nsIDocumentEncoder::OutputWrap));
}
inline bool MayBreakLines()
{
return !(mFlags & nsIDocumentEncoder::OutputDisallowLineBreaking);
}
inline bool DoOutput()
{
return mHeadLevel == 0;
}
// Stack handling functions
bool GetLastBool(const nsTArray<bool>& aStack);
void SetLastBool(nsTArray<bool>& aStack, bool aValue);
void PushBool(nsTArray<bool>& aStack, bool aValue);
bool PopBool(nsTArray<bool>& aStack);
bool ShouldReplaceContainerWithPlaceholder(nsIAtom* aTag);
bool IsIgnorableRubyAnnotation(nsIAtom* aTag);
bool IsElementPreformatted(mozilla::dom::Element* aElement);
bool IsElementBlock(mozilla::dom::Element* aElement);
private:
nsString mCurrentLine;
uint32_t mHeadLevel;
bool mAtFirstColumn;
// Handling of quoted text (for mail):
// Quotes need to be wrapped differently from non-quoted text,
// because quoted text has a few extra characters (e.g. ">> ")
// which makes the line length longer.
// Mail can represent quotes in different ways:
// Not wrapped in any special tag (if mail.compose.wrap_to_window_width)
// or in a <span>.
bool mDontWrapAnyQuotes; // no special quote markers
bool mStructs; // Output structs (pref)
// If we've just written out a cite blockquote, we need to remember it
// so we don't duplicate spaces before a <pre wrap> (which mail uses to quote
// old messages).
bool mHasWrittenCiteBlockquote;
int32_t mIndent;
// mInIndentString keeps a header that has to be written in the indent.
// That could be, for instance, the bullet in a bulleted list.
nsString mInIndentString;
int32_t mCiteQuoteLevel;
int32_t mFlags;
int32_t mFloatingLines; // To store the number of lazy line breaks
// The wrap column is how many standard sized chars (western languages)
// should be allowed on a line. There could be less chars if the chars
// are wider than latin chars of more if the chars are more narrow.
uint32_t mWrapColumn;
// The width of the line as it will appear on the screen (approx.)
uint32_t mCurrentLineWidth;
// Treat quoted text as though it's preformatted -- don't wrap it.
// Having it on a pref is a temporary measure, See bug 69638.
int32_t mSpanLevel;
int32_t mEmptyLines; // Will be the number of empty lines before
// the current. 0 if we are starting a new
// line and -1 if we are in a line.
bool mInWhitespace;
bool mPreFormattedMail; // we're dealing with special DOM
// used by Thunderbird code.
bool mStartedOutput; // we've produced at least a character
// While handling a new tag, this variable should remind if any line break
// is due because of a closing tag. Setting it to "TRUE" while closing the tags.
// Hence opening tags are guaranteed to start with appropriate line breaks.
bool mLineBreakDue;
bool mPreformattedBlockBoundary;
// Whether the output should include ruby annotations.
bool mWithRubyAnnotation;
nsString mURL;
int32_t mHeaderStrategy; /* Header strategy (pref)
0 = no indention
1 = indention, increased with
header level (default)
2 = numbering and slight indention */
int32_t mHeaderCounter[7]; /* For header-numbering:
Number of previous headers of
the same depth and in the same
section.
mHeaderCounter[1] for <h1> etc. */
RefPtr<mozilla::dom::Element> mElement;
// For handling table rows
AutoTArray<bool, 8> mHasWrittenCellsForRow;
// Values gotten in OpenContainer that is (also) needed in CloseContainer
AutoTArray<bool, 8> mIsInCiteBlockquote;
// The output data
nsAString* mOutputString;
// The tag stack: the stack of tags we're operating on, so we can nest.
// The stack only ever points to static atoms, so they don't need to be
// refcounted.
nsIAtom** mTagStack;
uint32_t mTagStackIndex;
// The stack indicating whether the elements we've been operating on are
// CSS preformatted elements, so that we can tell if the text inside them
// should be formatted.
std::stack<bool> mPreformatStack;
// Content in the stack above this index should be ignored:
uint32_t mIgnoreAboveIndex;
// The stack for ordered lists
int32_t *mOLStack;
uint32_t mOLStackIndex;
uint32_t mULCount;
nsString mLineBreak;
nsCOMPtr<nsILineBreaker> mLineBreaker;
// Conveniance constant. It would be nice to have it as a const static
// variable, but that causes issues with OpenBSD and module unloading.
const nsString kSpace;
// If nsIDocumentEncoder::OutputNonTextContentAsPlaceholder is set, the child
// nodes of specific nodes - <iframe>, <canvas>, etc. should be ignored.
// mIgnoredChildNodeLevel is used to tell if current node is an ignorable
// child node. The initial value of mIgnoredChildNodeLevel is 0. When
// serializer enters those specific nodes, mIgnoredChildNodeLevel increases
// and is greater than 0. Otherwise when serializer leaves those nodes,
// mIgnoredChildNodeLevel decreases.
uint32_t mIgnoredChildNodeLevel;
};
nsresult
NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer);
#endif