Files
xmlada/tests/unicode/test_unicode.adb
Vasiliy Fofanov 26e0d6b450 Clean up redundant context clauses. UC14-034
Change-Id: I33ca879d3e0f670385b463dd13f2f10074398df2
2021-12-16 12:29:38 +00:00

237 lines
9.7 KiB
Ada

------------------------------------------------------------------------------
-- XML/Ada - An XML suite for Ada95 --
-- --
-- Copyright (C) 2004-2021, AdaCore --
-- --
-- This library is free software; you can redistribute it and/or modify it --
-- under terms of the GNU General Public License as published by the Free --
-- Software Foundation; either version 3, or (at your option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. --
-- --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception, --
-- version 3.1, as published by the Free Software Foundation. --
-- --
-- You should have received a copy of the GNU General Public License and --
-- a copy of the GCC Runtime Library Exception along with this program; --
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
-- <http://www.gnu.org/licenses/>. --
-- --
------------------------------------------------------------------------------
with Unicode; use Unicode;
with Unicode.CES; use Unicode.CES;
with Unicode.CES.Utf32; use Unicode.CES.Utf32;
with Unicode.CES.Utf16; use Unicode.CES.Utf16;
with Unicode.CES.Utf8;
with Unicode.CES.Basic_8bit;
with Unicode.CCS; use Unicode.CCS;
with Unicode.Encodings; use Unicode.Encodings;
with Unicode.CCS.Iso_8859_2; use Unicode.CCS.Iso_8859_2;
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Exceptions; use Ada.Exceptions;
procedure Test_Unicode is
Test_Num : Natural := 1;
Had_Error : Boolean := False;
procedure Assert (S1, S2 : String; Msg : String);
function Image (Str : String) return String;
function Encode
(C : Unicode_Char; Scheme : Encoding_Scheme) return Byte_Sequence;
-----------
-- Image --
-----------
function Image (Str : String) return String is
S : String (1 .. 6 * Str'Length);
K : Natural := S'First;
begin
for J in Str'Range loop
declare
Img : constant String := Natural'Image (Character'Pos (Str (J)))
& " ";
begin
S (K .. K + Img'Length - 1) := Img;
K := K + Img'Length;
end;
end loop;
return S (1 .. K - 1);
end Image;
------------
-- Assert --
------------
procedure Assert (S1, S2 : String; Msg : String) is
begin
Test_Num := Test_Num + 1;
if S1 /= S2 then
Put_Line ("### (" & Natural'Image (Test_Num) & ") " & Msg);
Put_Line (" --" & S1 & "--");
Put_Line (" --" & S2 & "--");
Had_Error := True;
end if;
end Assert;
------------
-- Encode --
------------
function Encode
(C : Unicode_Char; Scheme : Encoding_Scheme) return Byte_Sequence
is
Buffer : Byte_Sequence (1 .. 20);
Index : Natural := Buffer'First - 1;
begin
Scheme.Encode (C, Buffer, Index);
return Buffer (Buffer'First .. Index);
end Encode;
Utf16_S : constant Utf16_String := Encode (16#004D#, Utf16_LE_Encoding)
& Encode (16#0061#, Utf16_LE_Encoding)
& Encode (16#D800#, Utf16_LE_Encoding)
& Encode (16#DC00#, Utf16_LE_Encoding);
Utf16_LE_BOM : constant String :=
Character'Val (16#FF#) & Character'Val (16#FE#);
Utf16_BE_BOM : constant String :=
Character'Val (16#FE#) & Character'Val (16#FF#);
begin
-- Simple Utf32 encoding of characters
Assert (Image (Encode (16#004D#, Utf32_LE_Encoding)), " 77 0 0 0 ",
"Incorrect encoding for 16#004D#");
Assert (Image (Encode (16#0061#, Utf32_LE_Encoding)),
" 97 0 0 0 ",
"Incorrect encoding for 16#0061#");
Assert (Image (Encode (16#D800#, Utf32_LE_Encoding)),
" 0 216 0 0 ",
"Incorrect encoding for 16#D800#");
-- Conversion from utf16 to utf32
Assert (Image (Utf16_S), " 77 0 97 0 0 216 0 220 ",
"Incorrect encoding of Utf16_S");
Assert (Image (Utf16.To_Utf32 (Utf16_S)),
" 77 0 0 0 97 0 0 0 0 0 1 0 ",
"Incorrect conversion to utf32 for Utf16_S");
Assert (Image (Utf16.To_Unicode_LE (Utf16_S)),
" 77 0 97 0 0 216 0 220 ",
"Incorrect conversion of Utf16_S to little-endian");
Assert (Image (Utf16.To_Unicode_LE (Utf16_S)),
" 77 0 97 0 0 216 0 220 ",
"Incorrect conversion of Utf16_S to little-endian");
Assert (Image (Utf16.To_Unicode_LE (Utf16_LE_BOM & Utf16_S)),
" 77 0 97 0 0 216 0 220 ",
"Incorrect conversion of Utf16_S when adding LE_BOM");
Assert (Image (Utf16.To_Unicode_LE (Utf16_BE_BOM & Utf16_S)),
" 0 77 0 97 216 0 220 0 ",
"Incorrect conversion of Utf16_S when adding BE_BOM");
-- Conversion from utf16 to Utf8
declare
Utf8_S : constant String :=
Utf8.From_Utf32 (Utf16.To_Utf32 (Utf16_S));
begin
Assert (Image (Utf8_S), " 77 97 240 144 128 128 ",
"Incorrect conversion of Utf16_S to Utf8");
Assert (Image (Utf8.To_Unicode_LE (Utf8_S)),
" 77 97 240 144 128 128 ",
"Incorrect conversion of Utf16_S to Utf8, checking BOM");
Assert (Utf16.From_Utf32 (Utf8.To_Utf32 (Utf8_S)),
Utf16_S,
"Incorrect conversion of Utf8_S to Utf16");
end;
-- Conversion from other character sets
declare
Latin_1_Utf16 : constant String :=
Encode (16#E9#, Utf16_LE_Encoding) -- e_Acute
& Encode (Character'Pos ('t'), Utf16_LE_Encoding)
& Encode (16#E9#, Utf16_LE_Encoding); -- e_Acute
Latin_2_Utf16 : constant String :=
Encode (16#F9#, Utf16_LE_Encoding) -- u_dot
& Encode (Character'Pos ('t'), Utf16_LE_Encoding)
& Encode (16#E8#, Utf16_LE_Encoding); -- c carron
Latin_1_8bit : constant String :=
Character'Val (16#E9#) -- e_Acute
& 't'
& Character'Val (16#E9#); -- e_Acute
begin
Assert (Image (Utf32.To_Unicode_LE (Utf16.To_Utf32 (Latin_1_Utf16))),
" 233 0 0 0 116 0 0 0 233 0 0 0 ",
"Incorrect conversion to utf32 for Latin1_Utf16, with "
& "Iso_8859_1");
Assert (Image (Utf32.To_Unicode_LE
(Utf16.To_Utf32 (Latin_1_Utf16),
Get_By_Name ("Latin1").Character_Set)),
" 233 0 0 0 116 0 0 0 233 0 0 0 ",
"Incorrect conversion to utf32 for Latin1_Utf16, with "
& "Iso_8859_1");
Assert (Image (Utf32.To_Unicode_LE (Utf16.To_Utf32 (Latin_2_Utf16))),
" 249 0 0 0 116 0 0 0 232 0 0 0 ",
"Incorrect conversion to utf32 for Latin2_Utf16, with "
& "Iso_8859_2");
Assert (Image (Utf32.To_Unicode_LE
(Utf16.To_Utf32 (Latin_2_Utf16),
Iso_8859_2_Character_Set)),
" 111 1 0 0 116 0 0 0 13 1 0 0 ",
"Incorrect conversion to utf32 for Latin2_Utf16, with "
& "Iso_8859_2");
Assert (Image (Basic_8bit.To_Unicode_LE (Latin_1_8bit)),
" 233 116 233 ",
"Incorrect conversion to 8bit for Latin1_8bit");
Assert (Image (Basic_8bit.To_Unicode_LE
(Latin_1_8bit, Get_By_Name ("Latin1").Character_Set)),
" 233 116 233 ",
"Incorrect conversion to 8bit for Latin1_8bit, with "
& "Iso_8859_1");
-- The next two instructions do the same thing, although the first form
-- is prefered
Assert (Image (Basic_8bit.To_Utf32 (Basic_8bit.To_Unicode_LE
(Latin_1_8bit,
Get_By_Name ("Latin1").Character_Set))),
" 233 0 0 0 116 0 0 0 233 0 0 0 ",
"Incorrect conversion to 8bit for Latin1_8bit, with "
& "Iso_8859_1");
Assert (Image (Utf32.To_Unicode_LE
(Basic_8bit.To_Utf32 (Latin_1_8bit),
Get_By_Name ("Latin1").Character_Set)),
" 233 0 0 0 116 0 0 0 233 0 0 0 ",
"Incorrect conversion to 8bit for Latin1_8bit, with "
& "Iso_8859_1");
end;
declare
Asc : constant Unicode_Encoding := Get_By_Name ("ascii");
Utf8 : constant Unicode_Encoding := Get_By_Name ("utf8");
Str : constant String := "Ascii string";
begin
Assert (Convert (Str, From => Asc, To => Asc), Str,
"Incorrect conversion from ascii to ascii");
Assert (Convert (Str, From => Asc, To => Utf8), Str,
"Incorrect conversion from ascii to utf8");
Assert (Convert
(Convert (Str, From => Asc, To => Utf8),
From => Utf8, To => Asc),
Str,
"Incorrect conversion from ascii to utf8 and back");
end;
if not Had_Error then
Put_Line ("test_unicode : SUCCESS");
end if;
exception
when E : others =>
Raise_Exception
(Exception_Identity (E),
Exception_Message (E)
& " while processing test" & Natural'Image (Test_Num));
end Test_Unicode;