diff --git a/AUTHORS b/AUTHORS index 99b1a25..bb3b285 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,9 +1,4 @@ -Unicode, SAX, DOM implementations: - Emmanuel Briot - -Initial Http input: - Christophe Baillon (ITRIS) - -ISO 8859-15 support: - Martin Krischik - +Unicode, SAX, DOM implementations: Emmanuel Briot +Initial Http input: Christophe Baillon (ITRIS) +ISO 8859-15 support: Martin Krischik +Initial Incomplete_Encoding support: Jehan Pages diff --git a/features-40 b/features-40 index 5e3ae60..9b4f03c 100644 --- a/features-40 +++ b/features-40 @@ -38,6 +38,13 @@ Reader will still work, although it will be less efficient in most cases. New features in version 4.0 --------------------------- +NF-40-J818-003 Unicode: new exception Incomplete_Encoding (2010-08-31) + + A new exception was defined in the unicode module. This exception is + used to distinguish between cases where we might simply be missing + some characters (and thus should wait for more) with cases where the + encoding is indeed invalid. + NF-40-J610-021 XML/Ada is compatible with pragma Ravenscar (2010-06-29) XML/Ada is now compatible with the ravenscar profile diff --git a/input_sources/input_sources-file.adb b/input_sources/input_sources-file.adb index 8ac4b55..8d13ad4 100644 --- a/input_sources/input_sources-file.adb +++ b/input_sources/input_sources-file.adb @@ -135,6 +135,10 @@ package body Input_Sources.File is begin From.Es.Read (From.Buffer.all, From.Index, C); C := From.Cs.To_Unicode (C); + exception + -- For a file input, an incomplete encoding is invalid. + when Incomplete_Encoding => + raise Invalid_Encoding; end Next_Char; --------- diff --git a/input_sources/input_sources-http.adb b/input_sources/input_sources-http.adb index 0dcfc69..3d831cc 100644 --- a/input_sources/input_sources-http.adb +++ b/input_sources/input_sources-http.adb @@ -1,7 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2008, AdaCore -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -326,6 +326,13 @@ package body Input_Sources.Http is begin From.Es.Read (From.Buffer.all, From.Index, C); C := From.Cs.To_Unicode (C); + + exception + -- The whole page has been fully loaded in the Open step. + -- Hence if the buffer ends with an Incomplete_Encoding, this + -- is a fatale error. + when Incomplete_Encoding => + raise Invalid_Encoding; end Next_Char; --------- diff --git a/input_sources/input_sources-strings.adb b/input_sources/input_sources-strings.adb index d9d3978..4106773 100644 --- a/input_sources/input_sources-strings.adb +++ b/input_sources/input_sources-strings.adb @@ -1,8 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2002 -- --- ACT-Europe -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -122,6 +121,11 @@ package body Input_Sources.Strings is C : out Unicode.Unicode_Char) is begin From.Encoding.Read (From.Buffer.all, From.Index, C); + + exception + -- For a String input, an incomplete encoding is invalid. + when Incomplete_Encoding => + raise Invalid_Encoding; end Next_Char; --------- diff --git a/unicode/unicode-ces-utf16.adb b/unicode/unicode-ces-utf16.adb index 65534f3..44486b8 100644 --- a/unicode/unicode-ces-utf16.adb +++ b/unicode/unicode-ces-utf16.adb @@ -1,8 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2006 -- --- AdaCore -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -106,7 +105,7 @@ package body Unicode.CES.Utf16 is C, D : Unicode_Char; begin if Index + 1 > Str'Last then - raise Invalid_Encoding; + raise Incomplete_Encoding; end if; C := Character'Pos (Str (Index + 1)) * 256 + Character'Pos (Str (Index)); @@ -114,7 +113,7 @@ package body Unicode.CES.Utf16 is -- High surrogate value if C in 16#D800# .. 16#DBFF# then if Index + 3 > Str'Last then - raise Invalid_Encoding; + raise Incomplete_Encoding; end if; D := Character'Pos (Str (Index + 3)) * 256 + Character'Pos (Str (Index + 2)); @@ -146,7 +145,7 @@ package body Unicode.CES.Utf16 is C, D : Unicode_Char; begin if Index + 1 > Str'Last then - raise Invalid_Encoding; + raise Incomplete_Encoding; end if; C := Character'Pos (Str (Index)) * 256 + Character'Pos (Str (Index + 1)); @@ -154,7 +153,7 @@ package body Unicode.CES.Utf16 is -- High surrogate value if C in 16#D800# .. 16#DBFF# then if Index + 3 > Str'Last then - raise Invalid_Encoding; + raise Incomplete_Encoding; end if; D := Character'Pos (Str (Index + 2)) * 256 + Character'Pos (Str (Index + 3)); diff --git a/unicode/unicode-ces-utf16.ads b/unicode/unicode-ces-utf16.ads index fffa170..841131f 100644 --- a/unicode/unicode-ces-utf16.ads +++ b/unicode/unicode-ces-utf16.ads @@ -1,8 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2002 -- --- ACT-Europe -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -79,6 +78,9 @@ package Unicode.CES.Utf16 is Index : in out Positive; Char : out Unicode_Char); -- Return the character starting at location Index in Str + -- Invalid_Encoding is raised if not valid byte sequence starts at Index. + -- Incomplete_Encoding is raised if there is not enough characters for + -- a valid encoding. procedure Read_BE (Str : Utf16_BE_String; diff --git a/unicode/unicode-ces-utf32.adb b/unicode/unicode-ces-utf32.adb index 762015d..ffadbf4 100644 --- a/unicode/unicode-ces-utf32.adb +++ b/unicode/unicode-ces-utf32.adb @@ -1,8 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2002 -- --- ACT-Europe -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -77,7 +76,7 @@ package body Unicode.CES.Utf32 is Char : out Unicode_Char) is begin if Index > Str'Last - 3 then - raise Invalid_Encoding; + raise Incomplete_Encoding; else Char := Character'Pos (Str (Index)) + Character'Pos (Str (Index + 1)) * (2 ** 8) @@ -97,7 +96,7 @@ package body Unicode.CES.Utf32 is Char : out Unicode_Char) is begin if Index > Str'Last - 3 then - raise Invalid_Encoding; + raise Incomplete_Encoding; else Char := Character'Pos (Str (Index + 3)) + Character'Pos (Str (Index + 2)) * (2 ** 8) diff --git a/unicode/unicode-ces-utf32.ads b/unicode/unicode-ces-utf32.ads index 2972777..d226c46 100644 --- a/unicode/unicode-ces-utf32.ads +++ b/unicode/unicode-ces-utf32.ads @@ -1,8 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2002 -- --- ACT-Europe -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -81,6 +80,9 @@ package Unicode.CES.Utf32 is Index : in out Positive; Char : out Unicode_Char); -- Return the character starting at location Index in Str + -- Invalid_Encoding is raised if not valid byte sequence starts at Index. + -- Incomplete_Encoding is raised if there is not enough characters for + -- a valid encoding. procedure Encode_BE (Char : Unicode_Char; diff --git a/unicode/unicode-ces-utf8.adb b/unicode/unicode-ces-utf8.adb index 695e76b..7dbe334 100644 --- a/unicode/unicode-ces-utf8.adb +++ b/unicode/unicode-ces-utf8.adb @@ -1,8 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2002 -- --- ACT-Europe -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -139,11 +138,7 @@ package body Unicode.CES.Utf8 is raise Invalid_Encoding; end if; - if Str'Last < Len then - raise Invalid_Encoding; - end if; - - for Count in Index + 1 .. Len loop + for Count in Index + 1 .. Natural'Min (Len, Str'Last) loop C := Character'Pos (Str (Count)); if (C and 16#C0#) /= 16#80# then raise Invalid_Encoding; @@ -151,6 +146,10 @@ package body Unicode.CES.Utf8 is Val := (Val * (2 ** 6)) or (C and 16#3f#); end loop; + if Str'Last < Len then + raise Incomplete_Encoding; + end if; + Index := Len + 1; Char := Val; end Read; diff --git a/unicode/unicode-ces-utf8.ads b/unicode/unicode-ces-utf8.ads index 406a97c..838eb60 100644 --- a/unicode/unicode-ces-utf8.ads +++ b/unicode/unicode-ces-utf8.ads @@ -1,8 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2002 -- --- ACT-Europe -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -82,8 +81,8 @@ package Unicode.CES.Utf8 is -- Return the character starting at location Index in Str, and move Index -- to the beginning of the next location -- Invalid_Encoding is raised if not valid byte sequence starts at Index. - -- Constraint_Error is raised if Index does not point to a valid character - -- in Str. + -- Incomplete_Encoding is raised if there is not enough characters for + -- a valid encoding. function Width (Char : Unicode_Char) return Natural; -- Return the number of bytes occupied by the Utf8 representation of Char diff --git a/unicode/unicode-ces.ads b/unicode/unicode-ces.ads index ce61dd0..0d7962f 100644 --- a/unicode/unicode-ces.ads +++ b/unicode/unicode-ces.ads @@ -1,8 +1,7 @@ ----------------------------------------------------------------------- -- XML/Ada - An XML suite for Ada95 -- -- -- --- Copyright (C) 2001-2002 -- --- ACT-Europe -- +-- Copyright (C) 2001-2010, AdaCore -- -- -- -- This library is free software; you can redistribute it and/or -- -- modify it under the terms of the GNU General Public -- @@ -120,7 +119,9 @@ package Unicode.CES is -- This function returns the character at position Index in the byte -- sequence Str, and moves Index to the start of the next character. -- If Str doesn't contain enough bytes for a valid encoding of a character, - -- Invalid_Encoding is raised. + -- Incomplete_Encoding is raised. + -- If Str contains an invalid byte sequence at Index, Invalid_Encoding + -- is raised. type Width_Function is access function (Char : Unicode.Unicode_Char) return Natural; @@ -164,6 +165,11 @@ package Unicode.CES is ---------------- Invalid_Encoding : exception; - -- Raised whener the byte sequence associated with a given encoding + -- Raised whenever the byte sequence associated with a given encoding -- scheme is not valid. + + Incomplete_Encoding : exception; + -- Raised whenever the byte sequence associated with a given encoding + -- scheme is incomplete. + end Unicode.CES;