mirror of
https://gitlab.winehq.org/wine/wine-gecko.git
synced 2024-09-13 09:24:08 -07:00
Bug 464314 - view-source link-browsing doesn't decode entities. r+sr=mrbkap
This commit is contained in:
parent
6e81518c41
commit
eb67a63210
@ -76,6 +76,7 @@
|
||||
#include "nsUnicharUtils.h"
|
||||
#include "nsPrintfCString.h"
|
||||
#include "nsNetUtil.h"
|
||||
#include "nsHTMLEntities.h"
|
||||
|
||||
#include "nsIServiceManager.h"
|
||||
|
||||
@ -1000,8 +1001,10 @@ PRBool CViewSourceHTML::IsUrlAttribute(const nsAString& tagName,
|
||||
// This doesn't feel like the ideal place for this, but the alternatives don't
|
||||
// seem all that nice either.
|
||||
if (isHref && tagName.LowerCaseEqualsLiteral("base")) {
|
||||
const nsSubstring& baseSpec = TrimTokenValue(attrValue);
|
||||
SetBaseURI(baseSpec);
|
||||
const nsAString& baseSpec = TrimTokenValue(attrValue);
|
||||
nsAutoString expandedBaseSpec;
|
||||
ExpandEntities(baseSpec, expandedBaseSpec);
|
||||
SetBaseURI(expandedBaseSpec);
|
||||
}
|
||||
|
||||
return isHref || isSrc;
|
||||
@ -1080,8 +1083,11 @@ nsresult CViewSourceHTML::CreateViewSourceURL(const nsAString& linkUrl,
|
||||
rv = GetBaseURI(getter_AddRefs(baseURI));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
// Use the link URL and the base URI to build a URI for the link.
|
||||
rv = NS_NewURI(getter_AddRefs(hrefURI), linkUrl, charset.get(), baseURI);
|
||||
// Use the link URL and the base URI to build a URI for the link. Note that
|
||||
// the link URL may have untranslated entities in it.
|
||||
nsAutoString expandedLinkUrl;
|
||||
ExpandEntities(linkUrl, expandedLinkUrl);
|
||||
rv = NS_NewURI(getter_AddRefs(hrefURI), expandedLinkUrl, charset.get(), baseURI);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
// Get the absolute URL from the link URI.
|
||||
@ -1230,3 +1236,188 @@ nsresult CViewSourceHTML::SetBaseURI(const nsAString& baseSpec) {
|
||||
mBaseURI = baseURI;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
void CViewSourceHTML::ExpandEntities(const nsAString& textIn, nsString& textOut)
|
||||
{
|
||||
nsAString::const_iterator iter, end;
|
||||
textIn.BeginReading(iter);
|
||||
textIn.EndReading(end);
|
||||
|
||||
// The outer loop treats the input as a sequence of pairs of runs. The first
|
||||
// run of each pair is just a run of regular characters. The second run is
|
||||
// something that looks like it might be an entity reference, e.g. "&".
|
||||
// Any regular run may be empty, and the entity run can be skipped at the end
|
||||
// of the text. Apparent entities that can't be translated are copied
|
||||
// verbatim. In particular this allows for raw ampersands in the input.
|
||||
// Special care is taken to handle the end of the text at any point inside
|
||||
// the loop.
|
||||
while (iter != end) {
|
||||
// Copy characters to textOut until but not including the first ampersand.
|
||||
for (; iter != end; ++iter) {
|
||||
PRUnichar ch = *iter;
|
||||
if (ch == kAmpersand) {
|
||||
break;
|
||||
}
|
||||
textOut.Append(ch);
|
||||
}
|
||||
|
||||
// We have a possible entity. If the entity is well-formed (or well-enough
|
||||
// formed) copy the entity value to "textOut". Otherwise, copy the "entity"
|
||||
// source characters to "textOut" verbatim. Either way, advance "iter" to
|
||||
// the first position after the entity/entity-like-thing.
|
||||
CopyPossibleEntity(iter, end, textOut);
|
||||
}
|
||||
}
|
||||
|
||||
static PRBool InRange(PRUnichar ch, unsigned char chLow, unsigned char chHigh)
|
||||
{
|
||||
return (chLow <= ch) && (ch <= chHigh);
|
||||
}
|
||||
|
||||
static PRBool IsDigit(PRUnichar ch)
|
||||
{
|
||||
return InRange(ch, '0', '9');
|
||||
}
|
||||
|
||||
static PRBool IsHexDigit(PRUnichar ch)
|
||||
{
|
||||
return IsDigit(ch) || InRange(ch, 'A', 'F') || InRange(ch, 'a', 'f');
|
||||
}
|
||||
|
||||
static PRBool IsAlphaNum(PRUnichar ch)
|
||||
{
|
||||
return InRange(ch, 'A', 'Z') || InRange(ch, 'a', 'z') || IsDigit(ch);
|
||||
}
|
||||
|
||||
static PRBool IsAmpersand(PRUnichar ch)
|
||||
{
|
||||
return ch == kAmpersand;
|
||||
}
|
||||
|
||||
static PRBool IsHashsign(PRUnichar ch)
|
||||
{
|
||||
return ch == kHashsign;
|
||||
}
|
||||
|
||||
static PRBool IsXx(PRUnichar ch)
|
||||
{
|
||||
return (ch == 'X') || (ch == 'x');
|
||||
}
|
||||
|
||||
static PRBool IsSemicolon(PRUnichar ch)
|
||||
{
|
||||
return ch == kSemicolon;
|
||||
}
|
||||
|
||||
static PRBool ConsumeChar(nsAString::const_iterator& start,
|
||||
const nsAString::const_iterator &end,
|
||||
PRBool (*testFun)(PRUnichar ch))
|
||||
{
|
||||
if (start == end) {
|
||||
return PR_FALSE;
|
||||
}
|
||||
if (!testFun(*start)) {
|
||||
return PR_FALSE;
|
||||
}
|
||||
++start;
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
void CViewSourceHTML::CopyPossibleEntity(nsAString::const_iterator& iter,
|
||||
const nsAString::const_iterator& end,
|
||||
nsAString& textBuffer)
|
||||
{
|
||||
// Note that "iter" is passed by reference, and we need to make sure that
|
||||
// we update its position as we parse characters, so the caller will know
|
||||
// how much text we processed.
|
||||
|
||||
// Remember where we started.
|
||||
const nsAString::const_iterator start(iter);
|
||||
|
||||
// Our possible entity must at least start with an '&' -- bail if it doesn't.
|
||||
if (!ConsumeChar(iter, end, IsAmpersand)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Identify the entity "body" and classify it.
|
||||
nsAString::const_iterator startBody, endBody;
|
||||
enum {TYPE_ID, TYPE_DECIMAL, TYPE_HEXADECIMAL} entityType;
|
||||
if (ConsumeChar(iter, end, IsHashsign)) {
|
||||
if (ConsumeChar(iter, end, IsXx)) {
|
||||
startBody = iter;
|
||||
entityType = TYPE_HEXADECIMAL;
|
||||
while (ConsumeChar(iter, end, IsHexDigit)) {
|
||||
// empty
|
||||
}
|
||||
} else {
|
||||
startBody = iter;
|
||||
entityType = TYPE_DECIMAL;
|
||||
while (ConsumeChar(iter, end, IsDigit)) {
|
||||
// empty
|
||||
}
|
||||
}
|
||||
} else {
|
||||
startBody = iter;
|
||||
entityType = TYPE_ID;
|
||||
// The parser seems to allow some other characters, such as ":" and "_".
|
||||
// However, all of the entities that we know about (see nsHTMLEntityList.h)
|
||||
// are strictly alphanumeric.
|
||||
while (ConsumeChar(iter, end, IsAlphaNum)) {
|
||||
// empty
|
||||
}
|
||||
}
|
||||
|
||||
// Record the end of the entity body.
|
||||
endBody = iter;
|
||||
|
||||
// If the entity body is terminated with a semicolon, consume that too.
|
||||
PRBool properlyTerminated = ConsumeChar(iter, end, IsSemicolon);
|
||||
|
||||
// If the entity body is empty, then it's not really an entity. Copy what
|
||||
// we've parsed verbatim, and return immediately.
|
||||
if (startBody == endBody) {
|
||||
textBuffer.Append(Substring(start, iter));
|
||||
return;
|
||||
}
|
||||
|
||||
// Construct a string from the body range. Note that we need a regular
|
||||
// string since substrings don't provide ToInteger().
|
||||
nsAutoString entityBody(Substring(startBody, endBody));
|
||||
|
||||
// Decode the entity to a Unicode character.
|
||||
PRInt32 entityCode = -1;
|
||||
switch (entityType) {
|
||||
case TYPE_ID:
|
||||
entityCode = nsHTMLEntities::EntityToUnicode(entityBody);
|
||||
break;
|
||||
case TYPE_DECIMAL:
|
||||
entityCode = ToUnicode(entityBody, 10, -1);
|
||||
break;
|
||||
case TYPE_HEXADECIMAL:
|
||||
entityCode = ToUnicode(entityBody, 16, -1);
|
||||
break;
|
||||
default:
|
||||
NS_NOTREACHED("Unknown entity type!");
|
||||
break;
|
||||
}
|
||||
|
||||
// Note that the parser does not require terminating semicolons for entities
|
||||
// with eight bit values. We want to allow the same here.
|
||||
if (properlyTerminated || ((0 <= entityCode) && (entityCode < 256))) {
|
||||
textBuffer.Append((PRUnichar) entityCode);
|
||||
} else {
|
||||
// If the entity is malformed in any way, just copy the source text verbatim.
|
||||
textBuffer.Append(Substring(start, iter));
|
||||
}
|
||||
}
|
||||
|
||||
PRInt32 CViewSourceHTML::ToUnicode(const nsString &strNum, PRInt32 radix, PRInt32 fallback)
|
||||
{
|
||||
PRInt32 result;
|
||||
PRInt32 code = strNum.ToInteger(&result, radix);
|
||||
if (result == NS_OK) {
|
||||
return code;
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
|
@ -126,6 +126,11 @@ private:
|
||||
PRBool IsTokenValueTrimmableCharacter(PRUnichar ch);
|
||||
nsresult GetBaseURI(nsIURI **result);
|
||||
nsresult SetBaseURI(const nsAString& baseSpec);
|
||||
static void ExpandEntities(const nsAString& textIn, nsString& textOut);
|
||||
static void CopyPossibleEntity(nsAString::const_iterator& iter,
|
||||
const nsAString::const_iterator& end,
|
||||
nsAString& textBuffer);
|
||||
static PRInt32 ToUnicode(const nsString &strNum, PRInt32 radix, PRInt32 fallback);
|
||||
|
||||
protected:
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user