Bug 443976: Parsing support for the unicode-range: descriptor in @font-face. r=dbaron

This commit is contained in:
Zack Weinberg 2009-08-20 14:52:47 -07:00
parent 6ade4eb7d1
commit 63e2075a2a
7 changed files with 312 additions and 26 deletions

View File

@ -7980,8 +7980,53 @@ CSSParserImpl::ParseFontSrcFormat(nsTArray<nsCSSValue> & values)
PRBool
CSSParserImpl::ParseFontRanges(nsCSSValue& aValue)
{
// not currently implemented (bug 443976)
return PR_FALSE;
nsTArray<PRUint32> ranges;
for (;;) {
if (!GetToken(PR_TRUE))
break;
if (mToken.mType != eCSSToken_URange) {
UngetToken();
break;
}
// An invalid range token is a parsing error, causing the entire
// descriptor to be ignored.
if (!mToken.mIntegerValid)
return PR_FALSE;
PRUint32 low = mToken.mInteger;
PRUint32 high = mToken.mInteger2;
// A range that descends, or a range that is entirely outside the
// current range of Unicode (U+0-10FFFF) is ignored, but does not
// invalidate the descriptor. A range that straddles the high end
// is clipped.
if (low <= 0x10FFFF && low <= high) {
if (high > 0x10FFFF)
high = 0x10FFFF;
ranges.AppendElement(low);
ranges.AppendElement(high);
}
if (!ExpectSymbol(',', PR_TRUE))
break;
}
if (ranges.Length() == 0)
return PR_FALSE;
nsRefPtr<nsCSSValue::Array> srcVals
= nsCSSValue::Array::Create(ranges.Length());
if (!srcVals) {
mScanner.SetLowLevelError(NS_ERROR_OUT_OF_MEMORY);
return PR_FALSE;
}
for (PRUint32 i = 0; i < ranges.Length(); i++)
srcVals->Item(i).SetIntValue(ranges[i], eCSSUnit_Integer);
aValue.SetArrayValue(srcVals, eCSSUnit_Array);
return PR_TRUE;
}
PRBool

View File

@ -71,6 +71,7 @@
#include "nsDOMError.h"
#include "nsStyleUtil.h"
#include "nsCSSDeclaration.h"
#include "nsPrintfCString.h"
#define IMPL_STYLE_RULE_INHERIT(_class, super) \
NS_IMETHODIMP _class::GetStyleSheet(nsIStyleSheet*& aSheet) const { return super::GetStyleSheet(aSheet); } \
@ -1534,6 +1535,52 @@ AppendSerializedFontSrc(const nsCSSValue& src, nsAString & aResult NS_OUTPARAM)
aResult.Truncate(aResult.Length() - 2); // remove the last comma-space
}
// print all characters with at least four hex digits
static void
AppendSerializedUnicodePoint(PRUint32 aCode, nsACString &aBuf NS_OUTPARAM)
{
aBuf.Append(nsPrintfCString("%04X", aCode));
}
// A unicode-range: descriptor is represented as an array of integers,
// to be interpreted as a sequence of pairs: min max min max ...
// It is in source order. (Possibly it should be sorted and overlaps
// consolidated, but right now we don't do that.)
static void
AppendSerializedUnicodeRange(nsCSSValue const & aValue,
nsAString & aResult NS_OUTPARAM)
{
NS_PRECONDITION(aValue.GetUnit() == eCSSUnit_Null ||
aValue.GetUnit() == eCSSUnit_Array,
"improper value unit for unicode-range:");
aResult.Truncate();
if (aValue.GetUnit() != eCSSUnit_Array)
return;
nsCSSValue::Array const & sources = *aValue.GetArrayValue();
nsCAutoString buf;
NS_ABORT_IF_FALSE(sources.Count() % 2 == 0,
"odd number of entries in a unicode-range: array");
for (PRUint32 i = 0; i < sources.Count(); i += 2) {
PRUint32 min = sources[i].GetIntValue();
PRUint32 max = sources[i+1].GetIntValue();
// We don't try to replicate the U+XX?? notation.
buf.AppendLiteral("U+");
AppendSerializedUnicodePoint(min, buf);
if (min != max) {
buf.Append('-');
AppendSerializedUnicodePoint(max, buf);
}
buf.AppendLiteral(", ");
}
buf.Truncate(buf.Length() - 2); // remove the last comma-space
CopyASCIItoUTF16(buf, aResult);
}
// Mapping from nsCSSFontDesc codes to nsCSSFontFaceStyleDecl fields.
// Keep this in sync with enum nsCSSFontDesc in nsCSSProperty.h.
nsCSSValue nsCSSFontFaceStyleDecl::* const
@ -1606,7 +1653,7 @@ nsCSSFontFaceStyleDecl::GetPropertyValue(nsCSSFontDesc aFontDescID,
return NS_OK;
case eCSSFontDesc_UnicodeRange:
// these are not implemented, so always return an empty string
AppendSerializedUnicodeRange(val, aResult);
return NS_OK;
case eCSSFontDesc_UNKNOWN:

View File

@ -143,12 +143,31 @@ IsIdent(PRInt32 ch) {
return ch >= 0 && (ch >= 256 || (gLexTable[ch] & IS_IDENT) != 0);
}
static inline PRUint32
DecimalDigitValue(PRInt32 ch)
{
return ch - '0';
}
static inline PRUint32
HexDigitValue(PRInt32 ch)
{
if (IsDigit(ch)) {
return DecimalDigitValue(ch);
} else {
// Note: c&7 just keeps the low three bits which causes
// upper and lower case alphabetics to both yield their
// "relative to 10" value for computing the hex value.
return (ch & 0x7) + 9;
}
}
nsCSSToken::nsCSSToken()
{
mType = eCSSToken_Symbol;
}
void
void
nsCSSToken::AppendToString(nsString& aBuffer)
{
switch (mType) {
@ -160,6 +179,7 @@ nsCSSToken::AppendToString(nsString& aBuffer)
case eCSSToken_URL:
case eCSSToken_InvalidURL:
case eCSSToken_HTMLComment:
case eCSSToken_URange:
aBuffer.Append(mIdent);
break;
case eCSSToken_Number:
@ -694,6 +714,10 @@ nsCSSScanner::Next(nsCSSToken& aToken)
return PR_FALSE;
}
// UNICODE-RANGE
if ((ch == 'u' || ch == 'U') && Peek() == '+')
return ParseURange(ch, aToken);
// IDENT
if (StartsIdent(ch, Peek()))
return ParseIdent(ch, aToken);
@ -921,14 +945,7 @@ nsCSSScanner::ParseAndAppendEscape(nsString& aOutput)
Pushback(ch);
break;
} else if (IsHexDigit(ch)) {
if (IsDigit(ch)) {
rv = rv * 16 + (ch - '0');
} else {
// Note: c&7 just keeps the low three bits which causes
// upper and lower case alphabetics to both yield their
// "relative to 10" value for computing the hex value.
rv = rv * 16 + ((ch & 0x7) + 9);
}
rv = rv * 16 + HexDigitValue(ch);
} else {
NS_ASSERTION(IsWhitespace(ch), "bad control flow");
// single space ends escape
@ -1069,8 +1086,6 @@ nsCSSScanner::ParseAtKeyword(PRInt32 aChar, nsCSSToken& aToken)
return GatherIdent(0, aToken.mIdent);
}
#define CHAR_TO_DIGIT(_c) ((_c) - '0')
PRBool
nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
{
@ -1109,7 +1124,7 @@ nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
// Parse the integer part of the mantisssa
NS_ASSERTION(IsDigit(c), "Why did we get called?");
do {
intPart = 10*intPart + CHAR_TO_DIGIT(c);
intPart = 10*intPart + DecimalDigitValue(c);
c = Read();
// The IsDigit check will do the right thing even if Read() returns < 0
} while (IsDigit(c));
@ -1124,7 +1139,7 @@ nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
// Power of ten by which we need to divide our next digit
float divisor = 10;
do {
fracPart += CHAR_TO_DIGIT(c) / divisor;
fracPart += DecimalDigitValue(c) / divisor;
divisor *= 10;
c = Read();
// The IsDigit check will do the right thing even if Read() returns < 0
@ -1149,7 +1164,7 @@ nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
c = Read();
NS_ASSERTION(IsDigit(c), "Peek() must have lied");
do {
exponent = 10*exponent + CHAR_TO_DIGIT(c);
exponent = 10*exponent + DecimalDigitValue(c);
c = Read();
// The IsDigit check will do the right thing even if Read() returns < 0
} while (IsDigit(c));
@ -1276,3 +1291,95 @@ nsCSSScanner::ParseString(PRInt32 aStop, nsCSSToken& aToken)
}
return PR_TRUE;
}
// UNICODE-RANGE tokens match the regular expression
//
// u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
//
// However, some such tokens are "invalid". There are three valid forms:
//
// u+[0-9a-f]{x} 1 <= x <= 6
// u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
// u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
//
// All unicode-range tokens have their text recorded in mIdent; valid ones
// are also decoded into mInteger and mInteger2, and mIntegerValid is set.
PRBool
nsCSSScanner::ParseURange(PRInt32 aChar, nsCSSToken& aResult)
{
PRInt32 intro2 = Read();
PRInt32 ch = Peek();
// We should only ever be called if these things are true.
NS_ASSERTION(aChar == 'u' || aChar == 'U',
"unicode-range called with improper introducer (U)");
NS_ASSERTION(intro2 == '+',
"unicode-range called with improper introducer (+)");
// If the character immediately after the '+' is not a hex digit or
// '?', this is not really a unicode-range token; push everything
// back and scan the U as an ident.
if (!IsHexDigit(ch) && ch != '?') {
Pushback(intro2);
Pushback(aChar);
return ParseIdent(aChar, aResult);
}
aResult.mIdent.Truncate();
aResult.mIdent.Append(aChar);
aResult.mIdent.Append(intro2);
PRBool valid = PR_TRUE;
PRBool haveQues = PR_FALSE;
PRUint32 low = 0;
PRUint32 high = 0;
int i = 0;
for (;;) {
ch = Read();
i++;
if (i == 7 || !(IsHexDigit(ch) || ch == '?')) {
break;
}
aResult.mIdent.Append(ch);
if (IsHexDigit(ch)) {
if (haveQues) {
valid = PR_FALSE; // all question marks should be at the end
}
low = low*16 + HexDigitValue(ch);
high = high*16 + HexDigitValue(ch);
} else {
haveQues = PR_TRUE;
low = low*16 + 0x0;
high = high*16 + 0xF;
}
}
if (ch == '-' && IsHexDigit(Peek())) {
if (haveQues) {
valid = PR_FALSE;
}
aResult.mIdent.Append(ch);
high = 0;
i = 0;
for (;;) {
ch = Read();
i++;
if (i == 7 || !IsHexDigit(ch)) {
break;
}
aResult.mIdent.Append(ch);
high = high*16 + HexDigitValue(ch);
}
}
Pushback(ch);
aResult.mInteger = low;
aResult.mInteger2 = high;
aResult.mIntegerValid = valid;
aResult.mType = eCSSToken_URange;
return PR_TRUE;
}

View File

@ -98,6 +98,11 @@ enum nsCSSTokenType {
eCSSToken_Endsmatch, // "$="
eCSSToken_Containsmatch, // "*="
eCSSToken_URange, // Low in mInteger, high in mInteger2;
// mIntegerValid is true if the token is a
// valid range; mIdent preserves the textual
// form of the token for error reporting
// A special token indicating that there was an error in tokenization.
// It's always an unterminated string.
eCSSToken_Error // mSymbol + mIdent
@ -107,9 +112,10 @@ struct nsCSSToken {
nsAutoString mIdent NS_OKONHEAP;
float mNumber;
PRInt32 mInteger;
PRInt32 mInteger2;
nsCSSTokenType mType;
PRUnichar mSymbol;
PRPackedBool mIntegerValid; // for number and dimension
PRPackedBool mIntegerValid; // for number, dimension, urange
PRPackedBool mHasSign; // for number, percentage, and dimension
nsCSSToken();
@ -219,6 +225,7 @@ protected:
PRBool ParseNumber(PRInt32 aChar, nsCSSToken& aResult);
PRBool ParseRef(PRInt32 aChar, nsCSSToken& aResult);
PRBool ParseString(PRInt32 aChar, nsCSSToken& aResult);
PRBool ParseURange(PRInt32 aChar, nsCSSToken& aResult);
PRBool SkipCComment();
PRBool GatherIdent(PRInt32 aChar, nsString& aIdent);

View File

@ -89,7 +89,7 @@ var gCSSFontFaceDescriptors = {
},
"unicode-range": {
domProp: null,
values: [ "U+0-10FFFF", "U+3-7B3", "U+3??", "U+6A", "U+3????", "U+???", "U+302-302", "U+0-7,A-C", "U+100-17F,200-17F", "U+3??, U+500-513 ,U+612 , U+4????", "U+1FFF,U+200-27F" ],
invalid_values: [ "U+1????-2????" ]
values: [ "U+0-10FFFF", "U+3-7B3", "U+3??", "U+6A", "U+3????", "U+???", "U+302-302", "U+0-7,U+A-C", "U+100-17F,U+200-17F", "U+3??, U+500-513 ,U+612 , U+4????", "U+1FFF,U+200-27F" ],
invalid_values: [ "U+1????-2????", "U+0-7,A-C", "U+100-17F,200-17F" ]
}
}

View File

@ -40,9 +40,6 @@ function fake_set_property(descriptor, value) {
function xfail_parse(descriptor, value) {
switch (descriptor) {
case "unicode-range":
// not yet implemented
return true;
case "src":
// not clear whether this is an error or not, so mark todo for now
return value == "local(serif)";

View File

@ -38,7 +38,7 @@
noncanonical: true },
// Correct but unusual font-family.
{ rule: _("font-family: Hoefler Text;"),
{ rule: _("font-family: Hoefler Text;"),
d: {"font-family" : "\"Hoefler Text\""},
noncanonical: true },
@ -164,8 +164,91 @@
d: { "src" : "url(\"/fonts/Mouse\")" },
noncanonical: true },
// unicode-range is not implemented (bug 443976).
// tests for that omitted for now.
// Correct unicode-range:
{ rule: _("unicode-range: U+00A5;"), d: { "unicode-range" : "U+00A5" } },
{ rule: _("unicode-range: U+A5;"),
d: { "unicode-range" : "U+00A5" }, noncanonical: true },
{ rule: _("unicode-range: U+00a5;"),
d: { "unicode-range" : "U+00A5" }, noncanonical: true },
{ rule: _("unicode-range: u+00a5;"),
d: { "unicode-range" : "U+00A5" }, noncanonical: true },
{ rule: _("unicode-range: U+0000-00FF;"),
d: { "unicode-range" : "U+0000-00FF" } },
{ rule: _("unicode-range: U+00??;"),
d: { "unicode-range" : "U+0000-00FF" }, noncanonical: true },
{ rule: _("unicode-range: U+?"),
d: { "unicode-range" : "U+0000-000F" }, noncanonical: true },
{ rule: _("unicode-range: U+??????"),
d: { "unicode-range" : "U+0000-10FFFF" }, noncanonical: true },
{ rule: _("unicode-range: U+590-5ff;"),
d: { "unicode-range" : "U+0590-05FF" }, noncanonical: true },
{ rule: _("unicode-range: U+A0000-12FFFF"),
d: { "unicode-range" : "U+A0000-10FFFF" }, noncanonical: true },
{ rule: _("unicode-range: U+A5, U+4E00-9FFF, U+30??, U+FF00-FF9F;"),
d: { "unicode-range" : "U+00A5, U+4E00-9FFF, U+3000-30FF, U+FF00-FF9F" },
noncanonical: true },
{ rule: _("unicode-range: U+104??;"),
d: { "unicode-range" : "U+10400-104FF" }, noncanonical: true },
{ rule: _("unicode-range: U+320??, U+321??, U+322??, U+323??, U+324??, U+325??;"),
d: { "unicode-range" : "U+32000-320FF, U+32100-321FF, U+32200-322FF, U+32300-323FF, U+32400-324FF, U+32500-325FF" },
noncanonical: true },
{ rule: _("unicode-range: U+100000-10ABCD;"),
d: { "unicode-range" : "U+100000-10ABCD" } },
{ rule: _("unicode-range: U+0121 , U+1023"),
d: { "unicode-range" : "U+0121, U+1023" }, noncanonical: true },
{ rule: _("unicode-range: U+0121/**/, U+1023"),
d: { "unicode-range" : "U+0121, U+1023" }, noncanonical: true },
// Incorrect unicode-range:
{ rule: _("unicode-range:"), d: {} },
{ rule: _("unicode-range: U+"), d: {} },
{ rule: _("unicode-range: U+8FFFFFFF"), d: {} },
{ rule: _("unicode-range: U+8FFF-7000"), d: {} },
{ rule: _("unicode-range: U+8F??-9000"), d: {} },
{ rule: _("unicode-range: U+9000-9???"), d: {} },
{ rule: _("unicode-range: U+??00"), d: {} },
{ rule: _("unicode-range: U+12345678?"), d: {} },
{ rule: _("unicode-range: U+1????????"), d: {} },
{ rule: _("unicode-range: twelve"), d: {} },
{ rule: _("unicode-range: 1000"), d: {} },
{ rule: _("unicode-range: 13??"), d: {} },
{ rule: _("unicode-range: 1300-1377"), d: {} },
{ rule: _("unicode-range: U-1000"), d: {} },
{ rule: _("unicode-range: U+nnnn"), d: {} },
{ rule: _("unicode-range: U+0121 U+1023"), d: {} },
{ rule: _("unicode-range: U+ 0121"), d: {} },
{ rule: _("unicode-range: U +0121"), d: {} },
{ rule: _("unicode-range: U+0121-"), d: {} },
{ rule: _("unicode-range: U+0121- 1023"), d: {} },
{ rule: _("unicode-range: U+0121 -1023"), d: {} },
{ rule: _("unicode-range: U+012 ?"), d: {} },
{ rule: _("unicode-range: U+01 2?"), d: {} },
// Thorough test of seven-digit rejection: all these are syntax errors
{ rule: _("unicode-range: U+1034560, U+A5"), d: {} },
{ rule: _("unicode-range: U+1034569, U+A5"), d: {} },
{ rule: _("unicode-range: U+103456a, U+A5"), d: {} },
{ rule: _("unicode-range: U+103456f, U+A5"), d: {} },
{ rule: _("unicode-range: U+103456?, U+A5"), d: {} },
{ rule: _("unicode-range: U+103456-1034560, U+A5"), d: {} },
{ rule: _("unicode-range: U+103456-1034569, U+A5"), d: {} },
{ rule: _("unicode-range: U+103456-103456a, U+A5"), d: {} },
{ rule: _("unicode-range: U+103456-103456f, U+A5"), d: {} },
// Syntactically invalid unicode-range tokens invalidate the
// entire descriptor
{ rule: _("unicode-range: U+1, U+2, U+X"), d: {} },
{ rule: _("unicode-range: U+A5, U+0?F"), d: {} },
{ rule: _("unicode-range: U+A5, U+0F?-E00"), d: {} },
// Descending ranges and ranges outside 0-10FFFF are ignored
// but do not invalidate the descriptor
{ rule: _("unicode-range: U+A5, U+90-30"),
d: { "unicode-range" : "U+00A5" }, noncanonical: true },
{ rule: _("unicode-range: U+A5, U+220043"),
d: { "unicode-range" : "U+00A5" }, noncanonical: true },
];
var display = document.getElementById("display");