Bug 1176668 - Fix overflow avoidance in numeric character reference handling. r=wchen.

This commit is contained in:
Henri Sivonen 2015-08-25 18:05:45 +03:00
parent abf2d6a74c
commit 60bd872c8e
4 changed files with 109 additions and 47 deletions

View File

@ -357,8 +357,6 @@ public class Tokenizer implements Locator {
private int charRefBufMark;
private int prevValue;
protected int value;
private boolean seenDigits;
@ -3217,7 +3215,6 @@ public class Tokenizer implements Locator {
break stateloop;
}
c = checkChar(buf, pos);
prevValue = -1;
value = 0;
seenDigits = false;
/*
@ -3269,21 +3266,18 @@ public class Tokenizer implements Locator {
}
c = checkChar(buf, pos);
}
// Deal with overflow gracefully
if (value < prevValue) {
value = 0x110000; // Value above Unicode range but
// within int
// range
}
prevValue = value;
/*
* Consume as many characters as match the range of
* characters given above.
*/
assert value >= 0: "value must not become negative.";
if (c >= '0' && c <= '9') {
seenDigits = true;
value *= 10;
value += c - '0';
// Avoid overflow
if (value <= 0x10FFFF) {
value *= 10;
value += c - '0';
}
continue;
} else if (c == ';') {
if (seenDigits) {
@ -3350,31 +3344,34 @@ public class Tokenizer implements Locator {
break stateloop;
}
c = checkChar(buf, pos);
// Deal with overflow gracefully
if (value < prevValue) {
value = 0x110000; // Value above Unicode range but
// within int
// range
}
prevValue = value;
/*
* Consume as many characters as match the range of
* characters given above.
*/
assert value >= 0: "value must not become negative.";
if (c >= '0' && c <= '9') {
seenDigits = true;
value *= 16;
value += c - '0';
// Avoid overflow
if (value <= 0x10FFFF) {
value *= 16;
value += c - '0';
}
continue;
} else if (c >= 'A' && c <= 'F') {
seenDigits = true;
value *= 16;
value += c - 'A' + 10;
// Avoid overflow
if (value <= 0x10FFFF) {
value *= 16;
value += c - 'A' + 10;
}
continue;
} else if (c >= 'a' && c <= 'f') {
seenDigits = true;
value *= 16;
value += c - 'a' + 10;
// Avoid overflow
if (value <= 0x10FFFF) {
value *= 16;
value += c - 'a' + 10;
}
continue;
} else if (c == ';') {
if (seenDigits) {
@ -6613,7 +6610,6 @@ public class Tokenizer implements Locator {
hi = 0; // will always be overwritten before use anyway
candidate = -1;
charRefBufMark = 0;
prevValue = -1;
value = 0;
seenDigits = false;
endTag = false;
@ -6663,7 +6659,6 @@ public class Tokenizer implements Locator {
hi = other.hi;
candidate = other.candidate;
charRefBufMark = other.charRefBufMark;
prevValue = other.prevValue;
value = other.value;
seenDigits = other.seenDigits;
endTag = other.endTag;

View File

@ -1658,7 +1658,6 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
prevValue = -1;
value = 0;
seenDigits = false;
switch(c) {
@ -1684,14 +1683,13 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu
}
c = checkChar(buf, pos);
}
if (value < prevValue) {
value = 0x110000;
}
prevValue = value;
MOZ_ASSERT(value >= 0, "value must not become negative.");
if (c >= '0' && c <= '9') {
seenDigits = true;
value *= 10;
value += c - '0';
if (value <= 0x10FFFF) {
value *= 10;
value += c - '0';
}
continue;
} else if (c == ';') {
if (seenDigits) {
@ -1750,24 +1748,27 @@ nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* bu
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
if (value < prevValue) {
value = 0x110000;
}
prevValue = value;
MOZ_ASSERT(value >= 0, "value must not become negative.");
if (c >= '0' && c <= '9') {
seenDigits = true;
value *= 16;
value += c - '0';
if (value <= 0x10FFFF) {
value *= 16;
value += c - '0';
}
continue;
} else if (c >= 'A' && c <= 'F') {
seenDigits = true;
value *= 16;
value += c - 'A' + 10;
if (value <= 0x10FFFF) {
value *= 16;
value += c - 'A' + 10;
}
continue;
} else if (c >= 'a' && c <= 'f') {
seenDigits = true;
value *= 16;
value += c - 'a' + 10;
if (value <= 0x10FFFF) {
value *= 16;
value += c - 'a' + 10;
}
continue;
} else if (c == ';') {
if (seenDigits) {
@ -3950,7 +3951,6 @@ nsHtml5Tokenizer::resetToDataState()
hi = 0;
candidate = -1;
charRefBufMark = 0;
prevValue = -1;
value = 0;
seenDigits = false;
endTag = false;
@ -3999,7 +3999,6 @@ nsHtml5Tokenizer::loadState(nsHtml5Tokenizer* other)
hi = other->hi;
candidate = other->candidate;
charRefBufMark = other->charRefBufMark;
prevValue = other->prevValue;
value = other->value;
seenDigits = other->seenDigits;
endTag = other->endTag;

View File

@ -99,7 +99,6 @@ class nsHtml5Tokenizer
int32_t hi;
int32_t candidate;
int32_t charRefBufMark;
int32_t prevValue;
protected:
int32_t value;
private:

View File

@ -721,3 +721,72 @@ FOO&#xFFFFFF;ZOO
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#11111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO<4F>"
#data
FOO&#1111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO<4F>"
#data
FOO&#111111111111
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
(1,13): eof-in-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO<4F>"
#data
FOO&#11111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#1111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#111111111111ZOO
#errors
(1,3): expected-doctype-but-got-chars
(1,13): illegal-codepoint-for-numeric-entity
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"