gecko/content/base/src/nsHTMLContentSerializer.cpp

429 lines
13 KiB
C++
Raw Normal View History

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 sw=2 et tw=80: */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Ryan Jones <sciguyryan@gmail.com>
* Laurent Jouanneau <laurent.jouanneau@disruptive-innovations.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/*
* nsIContentSerializer implementation that can be used with an
* nsIDocumentEncoder to convert an HTML (not XHTML!) DOM to an HTML
* string that could be parsed into more or less the original DOM.
*/
#include "nsHTMLContentSerializer.h"
#include "nsIDOMElement.h"
#include "nsIDOMText.h"
#include "nsIContent.h"
#include "nsIDocument.h"
#include "nsIDOMDocument.h"
#include "nsINameSpaceManager.h"
#include "nsString.h"
#include "nsUnicharUtils.h"
#include "nsXPIDLString.h"
#include "nsIServiceManager.h"
#include "nsIDocumentEncoder.h"
#include "nsGkAtoms.h"
#include "nsIURI.h"
#include "nsNetUtil.h"
#include "nsEscape.h"
#include "nsITextToSubURI.h"
#include "nsCRT.h"
#include "nsIParserService.h"
#include "nsContentUtils.h"
#include "nsLWBrkCIID.h"
#include "nsIScriptElement.h"
#include "nsAttrName.h"
#include "nsHtml5Module.h"
#include "nsIHTMLDocument.h"
static const char kMozStr[] = "moz";
static const PRInt32 kLongLineLen = 128;
nsresult NS_NewHTMLContentSerializer(nsIContentSerializer** aSerializer)
{
nsHTMLContentSerializer* it = new nsHTMLContentSerializer();
if (!it) {
return NS_ERROR_OUT_OF_MEMORY;
}
return CallQueryInterface(it, aSerializer);
}
nsHTMLContentSerializer::nsHTMLContentSerializer()
{
mIsHTMLSerializer = PR_TRUE;
}
nsHTMLContentSerializer::~nsHTMLContentSerializer()
{
}
NS_IMETHODIMP
nsHTMLContentSerializer::AppendDocumentStart(nsIDOMDocument *aDocument,
nsAString& aStr)
{
return NS_OK;
}
void
nsHTMLContentSerializer::SerializeHTMLAttributes(nsIContent* aContent,
nsIDOMElement *aOriginalElement,
nsAString& aTagPrefix,
const nsAString& aTagNamespaceURI,
nsIAtom* aTagName,
nsAString& aStr)
{
PRInt32 count = aContent->GetAttrCount();
if (!count)
return;
nsresult rv;
nsAutoString nameStr, valueStr;
NS_NAMED_LITERAL_STRING(_mozStr, "_moz");
// HTML5 parser stored them in the order they were parsed so we want to
// loop forward in that case.
nsIDocument* doc = aContent->GetOwnerDocument();
PRBool loopForward = PR_FALSE;
if (!doc || doc->IsHTML()) {
nsCOMPtr<nsIHTMLDocument> htmlDoc(do_QueryInterface(doc));
if (htmlDoc) {
loopForward = nsHtml5Module::sEnabled;
}
}
PRInt32 index, limit, step;
if (loopForward) {
index = 0;
limit = count;
step = 1;
}
else {
// Loop backward over the attributes, since the order they are stored in is
// the opposite of the order they were parsed in (see bug 213347 for reason).
index = count - 1;
limit = -1;
step = -1;
}
for (; index != limit; index += step) {
const nsAttrName* name = aContent->GetAttrNameAt(index);
PRInt32 namespaceID = name->NamespaceID();
nsIAtom* attrName = name->LocalName();
// Filter out any attribute starting with [-|_]moz
const char* sharedName;
attrName->GetUTF8String(&sharedName);
if ((('_' == *sharedName) || ('-' == *sharedName)) &&
!nsCRT::strncmp(sharedName+1, kMozStr, PRUint32(sizeof(kMozStr)-1))) {
continue;
}
aContent->GetAttr(namespaceID, attrName, valueStr);
//
// Filter out special case of <br type="_moz"> or <br _moz*>,
// used by the editor. Bug 16988. Yuck.
//
if (aTagName == nsGkAtoms::br && attrName == nsGkAtoms::type &&
StringBeginsWith(valueStr, _mozStr)) {
continue;
}
if (mIsCopying && mIsFirstChildOfOL && (aTagName == nsGkAtoms::li) &&
(attrName == nsGkAtoms::value)){
// This is handled separately in SerializeLIValueAttribute()
continue;
}
PRBool isJS = IsJavaScript(aContent, attrName, namespaceID, valueStr);
if (((attrName == nsGkAtoms::href) ||
(attrName == nsGkAtoms::src))) {
// Make all links absolute when converting only the selection:
if (mFlags & nsIDocumentEncoder::OutputAbsoluteLinks) {
// Would be nice to handle OBJECT and APPLET tags,
// but that gets more complicated since we have to
// search the tag list for CODEBASE as well.
// For now, just leave them relative.
nsCOMPtr<nsIURI> uri = aContent->GetBaseURI();
if (uri) {
nsAutoString absURI;
rv = NS_MakeAbsoluteURI(absURI, valueStr, uri);
if (NS_SUCCEEDED(rv)) {
valueStr = absURI;
}
}
}
// Need to escape URI.
nsAutoString tempURI(valueStr);
if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
valueStr = tempURI;
}
if (mIsWholeDocument && aTagName == nsGkAtoms::meta &&
attrName == nsGkAtoms::content) {
// If we're serializing a <meta http-equiv="content-type">,
// use the proper value, rather than what's in the document.
nsAutoString header;
aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
if (header.LowerCaseEqualsLiteral("content-type")) {
valueStr = NS_LITERAL_STRING("text/html; charset=") +
NS_ConvertASCIItoUTF16(mCharset);
}
}
attrName->ToString(nameStr);
// Expand shorthand attribute.
if (IsShorthandAttr(attrName, aTagName) && valueStr.IsEmpty()) {
valueStr = nameStr;
}
SerializeAttr(EmptyString(), nameStr, valueStr, aStr, !isJS);
}
}
NS_IMETHODIMP
nsHTMLContentSerializer::AppendElementStart(nsIDOMElement *aElement,
nsIDOMElement *aOriginalElement,
nsAString& aStr)
{
NS_ENSURE_ARG(aElement);
nsCOMPtr<nsIContent> content = do_QueryInterface(aElement);
if (!content) return NS_ERROR_FAILURE;
PRBool forceFormat = PR_FALSE;
if (!CheckElementStart(content, forceFormat, aStr)) {
return NS_OK;
}
nsIAtom *name = content->Tag();
PRBool lineBreakBeforeOpen = LineBreakBeforeOpen(content->GetNameSpaceID(), name);
if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
if (mColPos && lineBreakBeforeOpen) {
AppendNewLineToString(aStr);
}
else {
MaybeAddNewlineForRootNode(aStr);
}
if (!mColPos) {
AppendIndentation(aStr);
}
else if (mAddSpace) {
AppendToString(PRUnichar(' '), aStr);
mAddSpace = PR_FALSE;
}
}
else if (mAddSpace) {
AppendToString(PRUnichar(' '), aStr);
mAddSpace = PR_FALSE;
}
else {
MaybeAddNewlineForRootNode(aStr);
}
// Always reset to avoid false newlines in case MaybeAddNewlineForRootNode wasn't
// called
mAddNewlineForRootNode = PR_FALSE;
AppendToString(kLessThan, aStr);
nsAutoString nameStr;
name->ToString(nameStr);
AppendToString(nameStr.get(), -1, aStr);
MaybeEnterInPreContent(content);
// for block elements, we increase the indentation
if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw)
IncrIndentation(name);
// Need to keep track of OL and LI elements in order to get ordinal number
// for the LI.
if (mIsCopying && name == nsGkAtoms::ol){
// We are copying and current node is an OL;
// Store its start attribute value in olState->startVal.
nsAutoString start;
PRInt32 startAttrVal = 0;
aElement->GetAttribute(NS_LITERAL_STRING("start"), start);
if (!start.IsEmpty()){
PRInt32 rv = 0;
startAttrVal = start.ToInteger(&rv);
//If OL has "start" attribute, first LI element has to start with that value
//Therefore subtracting 1 as all the LI elements are incrementing it before using it;
//In failure of ToInteger(), default StartAttrValue to 0.
if (NS_SUCCEEDED(rv))
startAttrVal--;
else
startAttrVal = 0;
}
mOLStateStack.AppendElement(olState(startAttrVal, PR_TRUE));
}
if (mIsCopying && name == nsGkAtoms::li) {
mIsFirstChildOfOL = IsFirstChildOfOL(aOriginalElement);
if (mIsFirstChildOfOL){
// If OL is parent of this LI, serialize attributes in different manner.
SerializeLIValueAttribute(aElement, aStr);
}
}
// Even LI passed above have to go through this
// for serializing attributes other than "value".
nsAutoString dummyPrefix;
SerializeHTMLAttributes(content, aOriginalElement, dummyPrefix, EmptyString(), name, aStr);
AppendToString(kGreaterThan, aStr);
if (name == nsGkAtoms::script ||
name == nsGkAtoms::style ||
name == nsGkAtoms::noscript ||
name == nsGkAtoms::noframes) {
++mDisableEntityEncoding;
}
if ((mDoFormat || forceFormat) && !mPreLevel &&
!mDoRaw && LineBreakAfterOpen(content->GetNameSpaceID(), name)) {
AppendNewLineToString(aStr);
}
AfterElementStart(content, aOriginalElement, aStr);
return NS_OK;
}
NS_IMETHODIMP
nsHTMLContentSerializer::AppendElementEnd(nsIDOMElement *aElement,
nsAString& aStr)
{
NS_ENSURE_ARG(aElement);
nsCOMPtr<nsIContent> content = do_QueryInterface(aElement);
if (!content) return NS_ERROR_FAILURE;
nsIAtom *name = content->Tag();
if (name == nsGkAtoms::script ||
name == nsGkAtoms::style ||
name == nsGkAtoms::noscript ||
name == nsGkAtoms::noframes) {
--mDisableEntityEncoding;
}
PRBool forceFormat = content->HasAttr(kNameSpaceID_None,
nsGkAtoms::mozdirty);
if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
DecrIndentation(name);
}
if (name == nsGkAtoms::script) {
nsCOMPtr<nsIScriptElement> script = do_QueryInterface(aElement);
if (script && script->IsMalformed()) {
// We're looking at a malformed script tag. This means that the end tag
// was missing in the source. Imitate that here by not serializing the end
// tag.
--mPreLevel;
return NS_OK;
}
}
else if (mIsCopying && name == nsGkAtoms::ol) {
NS_ASSERTION((!mOLStateStack.IsEmpty()), "Cannot have an empty OL Stack");
/* Though at this point we must always have an state to be deleted as all
the OL opening tags are supposed to push an olState object to the stack*/
if (!mOLStateStack.IsEmpty()) {
mOLStateStack.RemoveElementAt(mOLStateStack.Length() -1);
}
}
nsIParserService* parserService = nsContentUtils::GetParserService();
if (parserService) {
PRBool isContainer;
parserService->IsContainer(parserService->HTMLAtomTagToId(name),
isContainer);
if (!isContainer)
return NS_OK;
}
if ((mDoFormat || forceFormat) && !mPreLevel && !mDoRaw) {
PRBool lineBreakBeforeClose = LineBreakBeforeClose(content->GetNameSpaceID(), name);
if (mColPos && lineBreakBeforeClose) {
AppendNewLineToString(aStr);
}
if (!mColPos) {
AppendIndentation(aStr);
}
else if (mAddSpace) {
AppendToString(PRUnichar(' '), aStr);
mAddSpace = PR_FALSE;
}
}
else if (mAddSpace) {
AppendToString(PRUnichar(' '), aStr);
mAddSpace = PR_FALSE;
}
nsAutoString nameStr;
name->ToString(nameStr);
AppendToString(kEndTag, aStr);
AppendToString(nameStr.get(), -1, aStr);
AppendToString(kGreaterThan, aStr);
MaybeLeaveFromPreContent(content);
if ((mDoFormat || forceFormat) && !mPreLevel
&& !mDoRaw && LineBreakAfterClose(content->GetNameSpaceID(), name)) {
AppendNewLineToString(aStr);
}
else {
MaybeFlagNewlineForRootNode(aElement);
}
if (name == nsGkAtoms::body) {
--mInBody;
}
return NS_OK;
}