Files
xmlada/unicode/unicode-encodings.adb
Emmanuel Briot 53dc51bf5c Update copyright notices
Change-Id: Iae1283292b460ead48dedc565d6f24259e497e69
2017-01-03 10:02:38 +01:00

154 lines
7.5 KiB
Ada

------------------------------------------------------------------------------
-- XML/Ada - An XML suite for Ada95 --
-- --
-- Copyright (C) 2004-2017, AdaCore --
-- --
-- This library is free software; you can redistribute it and/or modify it --
-- under terms of the GNU General Public License as published by the Free --
-- Software Foundation; either version 3, or (at your option) any later --
-- version. This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHAN- --
-- TABILITY or FITNESS FOR A PARTICULAR PURPOSE. --
-- --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception, --
-- version 3.1, as published by the Free Software Foundation. --
-- --
-- You should have received a copy of the GNU General Public License and --
-- a copy of the GCC Runtime Library Exception along with this program; --
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
-- <http://www.gnu.org/licenses/>. --
-- --
------------------------------------------------------------------------------
with Ada.Characters.Handling; use Ada.Characters.Handling;
with Ada.Exceptions; use Ada.Exceptions;
with Ada.Strings.Unbounded; use Ada.Strings.Unbounded;
with Unicode.CES; use Unicode.CES;
with Unicode.CES.Basic_8bit; use Unicode.CES.Basic_8bit;
with Unicode.CES.Utf8; use Unicode.CES.Utf8;
with Unicode.CES.Utf16; use Unicode.CES.Utf16;
with Unicode.CES.Utf32; use Unicode.CES.Utf32;
with Unicode.CCS; use Unicode.CCS;
with Unicode.CCS.Iso_8859_1; use Unicode.CCS.Iso_8859_1;
with Unicode.CCS.Iso_8859_2; use Unicode.CCS.Iso_8859_2;
with Unicode.CCS.Iso_8859_3; use Unicode.CCS.Iso_8859_3;
with Unicode.CCS.Iso_8859_4; use Unicode.CCS.Iso_8859_4;
with Unicode.CCS.Iso_8859_15; use Unicode.CCS.Iso_8859_15;
with Unicode.CCS.Windows_1251; use Unicode.CCS.Windows_1251;
with Unicode.CCS.Windows_1252; use Unicode.CCS.Windows_1252;
package body Unicode.Encodings is
Cst_Utf16 : aliased constant String := "utf-16";
Cst_Utf16_BE : aliased constant String := "utf-16be";
Cst_Utf16_LE : aliased constant String := "utf-16le";
Cst_Utf8 : aliased constant String := "utf-8";
Cst_Utf32 : aliased constant String := "utf-32";
Cst_Utf32_BE : aliased constant String := "utf-32be";
Cst_Utf32_LE : aliased constant String := "utf-32le";
-----------------
-- Get_By_Name --
-----------------
function Get_By_Name (Name : String) return Unicode_Encoding is
N : constant String := To_Lower (Name);
begin
if N = Cst_Utf16 or else N = Cst_Utf16_LE then
return (Name => Cst_Utf16'Access,
Character_Set => Unicode_Character_Set,
Encoding_Scheme => Utf16_LE_Encoding);
elsif N = Cst_Utf16_BE then
return (Name => Cst_Utf16_BE'Access,
Character_Set => Unicode_Character_Set,
Encoding_Scheme => Utf16_BE_Encoding);
elsif N = Cst_Utf32 or else N = Cst_Utf32_LE then
return (Name => Cst_Utf32'Access,
Character_Set => Unicode_Character_Set,
Encoding_Scheme => Utf32_LE_Encoding);
elsif N = Cst_Utf32_BE then
return (Name => Cst_Utf32_BE'Access,
Character_Set => Unicode_Character_Set,
Encoding_Scheme => Utf32_BE_Encoding);
elsif N = Cst_Utf8 or else N = "utf8" then
return (Name => Cst_Utf8'Access,
Character_Set => Unicode_Character_Set,
Encoding_Scheme => Utf8_Encoding);
elsif N = To_Lower (Unicode.CCS.Iso_8859_1.Name1)
or else N = "ascii"
then
return (Name => Unicode.CCS.Iso_8859_1.Name1'Access,
Character_Set => Iso_8859_1_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
elsif N = To_Lower (Unicode.CCS.Iso_8859_1.Name2) then
return (Name => Unicode.CCS.Iso_8859_1.Name2'Access,
Character_Set => Iso_8859_1_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
elsif N = To_Lower (Unicode.CCS.Iso_8859_2.Name1) then
return (Name => Unicode.CCS.Iso_8859_2.Name1'Access,
Character_Set => Iso_8859_2_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
elsif N = To_Lower (Unicode.CCS.Iso_8859_2.Name2) then
return (Name => Unicode.CCS.Iso_8859_2.Name2'Access,
Character_Set => Iso_8859_2_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
elsif N = To_Lower (Unicode.CCS.Iso_8859_3.Name1) then
return (Name => Unicode.CCS.Iso_8859_3.Name1'Access,
Character_Set => Iso_8859_3_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
elsif N = To_Lower (Unicode.CCS.Iso_8859_4.Name1) then
return (Name => Unicode.CCS.Iso_8859_4.Name1'Access,
Character_Set => Iso_8859_4_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
elsif N = To_Lower (Unicode.CCS.Iso_8859_15.Name1) then
return (Name => Unicode.CCS.Iso_8859_15.Name1'Access,
Character_Set => Iso_8859_15_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
elsif N = To_Lower (Unicode.CCS.Windows_1251.Name1) then
return (Name => Unicode.CCS.Windows_1251.Name1'Access,
Character_Set => Windows_1251_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
elsif N = To_Lower (Unicode.CCS.Windows_1252.Name1) then
return (Name => Unicode.CCS.Windows_1252.Name1'Access,
Character_Set => Windows_1252_Character_Set,
Encoding_Scheme => Basic_8bit_Encoding);
else
Raise_Exception
(Invalid_Encoding'Identity, "Invalid encoding: " & Name);
end if;
end Get_By_Name;
-------------
-- Convert --
-------------
function Convert
(Str : Byte_Sequence;
From : Unicode_Encoding := Get_By_Name ("iso-8859-15");
To : Unicode_Encoding := Get_By_Name ("utf-8"))
return Byte_Sequence
is
J : Natural := Str'First;
C : Unicode.Unicode_Char;
Buffer : Byte_Sequence (1 .. 20);
Index : Natural;
Result : Unbounded_String;
begin
if From = To then
return Str;
end if;
while J <= Str'Last loop
From.Encoding_Scheme.Read (Str, J, C);
C := From.Character_Set.To_Unicode (C);
C := To.Character_Set.To_CS (C);
Index := 0;
To.Encoding_Scheme.Encode (C, Buffer, Index);
Append (Result, Buffer (1 .. Index));
end loop;
return To_String (Result);
end Convert;
end Unicode.Encodings;