mirror of
https://github.com/AdaCore/VSS.git
synced 2026-02-12 13:06:25 -08:00
1972 lines
78 KiB
Ada
1972 lines
78 KiB
Ada
--
|
|
-- Copyright (C) 2023-2025, AdaCore
|
|
--
|
|
-- SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
--
|
|
|
|
with VSS.Implementation.UCD_Normalization_Common;
|
|
with VSS.Implementation.Strings;
|
|
with VSS.Implementation.UTF8_Strings.Mutable_Operations;
|
|
|
|
package body VSS.Implementation.UTF8_Normalization is
|
|
|
|
use type VSS.Implementation.Strings.Character_Count;
|
|
use type VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
|
|
procedure Append_Reordered
|
|
(Result_Data : in out
|
|
VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Decomposition_Data :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset_Array;
|
|
Last_CCC :
|
|
in out VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
Source_Storage :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
|
|
Source_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Source_Size : VSS.Unicode.UTF8_Code_Unit_Count);
|
|
-- Append full decomposition mapping specified by Info to the end of the
|
|
-- Result_Data with canonical reordering when necessary.
|
|
|
|
function Get_Decomposition_Information
|
|
(Decomposition_Data :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset_Array;
|
|
Code : VSS.Unicode.Code_Point)
|
|
return VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
-- Returns decomposition information for given data and character.
|
|
|
|
----------------------
|
|
-- Append_Reordered --
|
|
----------------------
|
|
|
|
procedure Append_Reordered
|
|
(Result_Data : in out
|
|
VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Decomposition_Data :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset_Array;
|
|
Last_CCC :
|
|
in out VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
Source_Storage :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
|
|
Source_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Source_Size : VSS.Unicode.UTF8_Code_Unit_Count)
|
|
is
|
|
use all type VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
|
|
procedure Reorder_And_Insert
|
|
(Result_Data : in out VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
CCC : VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
|
|
Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Size : VSS.Unicode.UTF8_Code_Unit_Count);
|
|
-- Insert given encoded character into the string preserving canonical
|
|
-- ordering.
|
|
|
|
------------------------
|
|
-- Reorder_And_Insert --
|
|
------------------------
|
|
|
|
procedure Reorder_And_Insert
|
|
(Result_Data : in out VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
CCC : VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
|
|
Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Size : VSS.Unicode.UTF8_Code_Unit_Count)
|
|
is
|
|
Previous_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Previous_Code : VSS.Unicode.Code_Point;
|
|
Previous_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Insert_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
|
|
begin
|
|
Previous_Offset := Result_Size;
|
|
|
|
loop
|
|
Insert_Offset := Previous_Offset;
|
|
|
|
exit when Previous_Offset = 0;
|
|
|
|
VSS.Implementation.UTF8_Strings.Unchecked_Backward_Decode
|
|
(Result_Data, Previous_Offset, Previous_Code);
|
|
|
|
Previous_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Previous_Code);
|
|
|
|
exit when Previous_Info.CCC = CCC_NR
|
|
or Previous_Info.CCC <= CCC;
|
|
end loop;
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations.Unchecked_Insert
|
|
(Result_Data,
|
|
Result_Size,
|
|
Insert_Offset,
|
|
Storage,
|
|
Offset,
|
|
Size,
|
|
1);
|
|
end Reorder_And_Insert;
|
|
|
|
Next_Offset : VSS.Unicode.UTF8_Code_Unit_Offset := Source_Offset;
|
|
Current_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Current_Code : VSS.Unicode.Code_Point;
|
|
Current_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
|
|
begin
|
|
loop
|
|
Current_Offset := Next_Offset;
|
|
|
|
exit when Next_Offset >= Source_Offset + Source_Size;
|
|
|
|
VSS.Implementation.UTF8_Encoding.Unchecked_Decode_Forward
|
|
(Source_Storage, Next_Offset, Current_Code);
|
|
|
|
Current_Info :=
|
|
Get_Decomposition_Information (Decomposition_Data, Current_Code);
|
|
|
|
if Last_CCC > Current_Info.CCC then
|
|
Reorder_And_Insert
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Info.CCC,
|
|
Source_Storage,
|
|
Current_Offset,
|
|
Next_Offset - Current_Offset);
|
|
|
|
else
|
|
pragma Warnings (Off);
|
|
-- Disable warnings, this code is never executed for now,
|
|
-- however, may need to be completed later.
|
|
|
|
Last_CCC := Current_Info.CCC;
|
|
|
|
raise Program_Error;
|
|
|
|
pragma Warnings (On);
|
|
end if;
|
|
end loop;
|
|
end Append_Reordered;
|
|
|
|
-----------------
|
|
-- Decomposite --
|
|
-----------------
|
|
|
|
procedure Decompose
|
|
(Source_Storage :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
|
|
Source_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Decomposition_Data :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset_Array;
|
|
Result_Data : out
|
|
VSS.Implementation.UTF8_Strings.UTF8_String_Data)
|
|
is
|
|
use all type VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
use type VSS.Unicode.Code_Point;
|
|
|
|
procedure Reorder_And_Insert
|
|
(Result_Data : in out VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
CCC : VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
|
|
Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Size : VSS.Unicode.UTF8_Code_Unit_Count);
|
|
-- Insert given encoded character into the string preserving canonical
|
|
-- ordering.
|
|
|
|
------------------------
|
|
-- Reorder_And_Insert --
|
|
------------------------
|
|
|
|
procedure Reorder_And_Insert
|
|
(Result_Data : in out VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
CCC : VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
Storage : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
|
|
Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Size : VSS.Unicode.UTF8_Code_Unit_Count)
|
|
is
|
|
Previous_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Previous_Code : VSS.Unicode.Code_Point;
|
|
Previous_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Insert_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
|
|
begin
|
|
Previous_Offset := Result_Size;
|
|
|
|
loop
|
|
Insert_Offset := Previous_Offset;
|
|
|
|
exit when Previous_Offset = 0;
|
|
|
|
VSS.Implementation.UTF8_Strings.Unchecked_Backward_Decode
|
|
(Result_Data, Previous_Offset, Previous_Code);
|
|
|
|
Previous_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Previous_Code);
|
|
|
|
exit when Previous_Info.CCC = CCC_NR
|
|
or Previous_Info.CCC <= CCC;
|
|
end loop;
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations.Unchecked_Insert
|
|
(Result_Data,
|
|
Result_Size,
|
|
Insert_Offset,
|
|
Storage,
|
|
Offset,
|
|
Size,
|
|
1);
|
|
end Reorder_And_Insert;
|
|
|
|
Result_Size : VSS.Unicode.UTF8_Code_Unit_Count := 0;
|
|
Code : VSS.Unicode.Code_Point;
|
|
Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Start : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Out_Start : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Offset : VSS.Unicode.UTF8_Code_Unit_Offset := 0;
|
|
Length : VSS.Implementation.Strings.Character_Count;
|
|
Last_CCC : VSS.Implementation.UCD_Normalization_UTF8.CCC_Values :=
|
|
VSS.Implementation.UCD_Normalization_UTF8.CCC_NR;
|
|
|
|
begin
|
|
loop
|
|
-- Check whether source string is in normalization form, and attempt
|
|
-- to lookup for maximum length of normalized data.
|
|
|
|
Start := Offset;
|
|
Length := 0;
|
|
|
|
loop
|
|
Out_Start := Offset;
|
|
|
|
exit when Offset >= Source_Size;
|
|
|
|
VSS.Implementation.UTF8_Encoding.Unchecked_Decode_Forward
|
|
(Source_Storage, Offset, Code);
|
|
|
|
Info := Get_Decomposition_Information (Decomposition_Data, Code);
|
|
|
|
exit when not Info.Decomposition_QC;
|
|
-- Copy data and run normalization
|
|
|
|
exit when Last_CCC > Info.CCC and Info.CCC /= CCC_NR;
|
|
-- Copy data and run reordering
|
|
|
|
Last_CCC := Info.CCC;
|
|
Length := Length + 1;
|
|
end loop;
|
|
|
|
-- Copy found normalized data if any
|
|
|
|
if Start /= Out_Start then
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Start,
|
|
Out_Start - Start,
|
|
Length);
|
|
end if;
|
|
|
|
exit when Out_Start >= Source_Size;
|
|
-- Source text has been processed completely, exit.
|
|
|
|
if Info.Decomposition_QC then
|
|
-- Apply canonical ordering algoriphm to the next character in
|
|
-- the source string.
|
|
|
|
Reorder_And_Insert
|
|
(Result_Data,
|
|
Result_Size,
|
|
Info.CCC,
|
|
Source_Storage,
|
|
Out_Start,
|
|
Offset - Out_Start);
|
|
|
|
else
|
|
-- Apply decomposition mapping
|
|
|
|
if Code in 16#AC00# .. 16#AC00# + 11_172 then
|
|
-- Hangul syllables are decomposed algorithmically.
|
|
|
|
declare
|
|
use type VSS.Unicode.UTF8_Code_Unit;
|
|
|
|
S_Base : constant := 16#AC00#;
|
|
L_Base : constant := 16#1100#;
|
|
V_Base : constant := 16#1161#;
|
|
T_Base : constant := 16#11A7#;
|
|
|
|
T_Count : constant := 28;
|
|
N_Count : constant := 588; -- V_Count * T_Count
|
|
|
|
S_Index : constant VSS.Unicode.Code_Point := Code - S_Base;
|
|
L_Index : constant VSS.Unicode.Code_Point :=
|
|
S_Index / N_Count;
|
|
V_Index : constant VSS.Unicode.Code_Point :=
|
|
(S_Index mod N_Count) / T_Count;
|
|
T_Index : constant VSS.Unicode.Code_Point :=
|
|
S_Index mod T_Count;
|
|
L_Part : constant VSS.Unicode.Code_Point :=
|
|
L_Base + L_Index;
|
|
V_Part : constant VSS.Unicode.Code_Point :=
|
|
V_Base + V_Index;
|
|
T_Part : constant VSS.Unicode.Code_Point :=
|
|
T_Base + T_Index;
|
|
|
|
Aux : VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array
|
|
(0 .. 8);
|
|
|
|
begin
|
|
-- First byte of encoded sequence for all characters of the
|
|
-- decomposition is always 16#E1#, thus don't compute it and
|
|
-- ignore corresponding bits in L_Part/V_Part/T_Part.
|
|
|
|
Aux (0) := 16#E1#;
|
|
Aux (1) :=
|
|
16#80#
|
|
or VSS.Unicode.UTF8_Code_Unit
|
|
((L_Part / 16#40#) mod 16#40#);
|
|
Aux (2) :=
|
|
16#80# or VSS.Unicode.UTF8_Code_Unit (L_Part mod 16#40#);
|
|
|
|
Aux (3) := 16#E1#;
|
|
Aux (4) :=
|
|
16#80#
|
|
or VSS.Unicode.UTF8_Code_Unit
|
|
((V_Part / 16#40#) mod 16#40#);
|
|
Aux (5) :=
|
|
16#80# or VSS.Unicode.UTF8_Code_Unit (V_Part mod 16#40#);
|
|
|
|
if T_Index = 0 then
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Aux,
|
|
0,
|
|
6,
|
|
2);
|
|
|
|
else
|
|
Aux (6) := 16#E1#;
|
|
Aux (7) :=
|
|
16#80#
|
|
or VSS.Unicode.UTF8_Code_Unit
|
|
((T_Part / 16#40#) mod 16#40#);
|
|
Aux (8) :=
|
|
16#80#
|
|
or VSS.Unicode.UTF8_Code_Unit (T_Part mod 16#40#);
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Aux,
|
|
0,
|
|
9,
|
|
3);
|
|
end if;
|
|
|
|
Last_CCC := CCC_NR;
|
|
end;
|
|
|
|
elsif Info.First_CCC /= CCC_NR
|
|
and Last_CCC /= CCC_NR
|
|
and Last_CCC > Info.First_CCC
|
|
then
|
|
-- Reordering is necessary
|
|
|
|
Append_Reordered
|
|
(Result_Data,
|
|
Result_Size,
|
|
Decomposition_Data,
|
|
Last_CCC,
|
|
VSS.Implementation.UCD_Normalization_UTF8.UTF8_Data_Table,
|
|
Info.Offset,
|
|
Info.Size);
|
|
|
|
else
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
VSS.Implementation.UCD_Normalization_UTF8.UTF8_Data_Table,
|
|
Info.Offset,
|
|
Info.Size,
|
|
Info.Length);
|
|
Last_CCC := Info.Last_CCC;
|
|
end if;
|
|
end if;
|
|
|
|
exit when Offset >= Source_Size;
|
|
end loop;
|
|
end Decompose;
|
|
|
|
---------------------------
|
|
-- Decompose_And_Compose --
|
|
---------------------------
|
|
|
|
procedure Decompose_And_Compose
|
|
(Source_Storage :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array;
|
|
Source_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Decomposition_Data :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset_Array;
|
|
Result_Data : out
|
|
VSS.Implementation.UTF8_Strings.UTF8_String_Data)
|
|
is
|
|
use type
|
|
VSS.Implementation.UCD_Normalization_Common.First_Mapping_Code_Offset;
|
|
use type
|
|
VSS.Implementation.UCD_Normalization_Common.Last_Mapping_Code_Offset;
|
|
use all type VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
use all type
|
|
VSS.Implementation.UCD_Normalization_UTF8.Composition_Quick_Check;
|
|
use type VSS.Unicode.Code_Point;
|
|
|
|
S_Base : constant := 16#AC00#;
|
|
L_Base : constant := 16#1100#;
|
|
V_Base : constant := 16#1161#;
|
|
T_Base : constant := 16#11A7#;
|
|
V_Count : constant := 21;
|
|
T_Count : constant := 28;
|
|
N_Count : constant := V_Count * T_Count; -- 588;
|
|
|
|
function Backward
|
|
(Data : VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
From : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Offset : out VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Size : out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Code : out VSS.Unicode.Code_Point;
|
|
Info : out
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information)
|
|
return Boolean;
|
|
-- Decode previous character, obtain its normalization information
|
|
-- and return True on success. Return False when given character is
|
|
-- first character of the string.
|
|
|
|
function Has_Decomposition
|
|
(Info : VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information)
|
|
return Boolean;
|
|
-- Return True when character has full decomposition.
|
|
|
|
procedure Apply_Decomposition
|
|
(Result_Data : in out
|
|
VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
From_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
From_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
From_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Skip_Compositon : out Boolean;
|
|
Starter_Offset : out VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Code : out VSS.Unicode.Code_Point;
|
|
Starter_Info : out
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Previous_CCC : out
|
|
VSS.Implementation.UCD_Normalization_UTF8.CCC_Values);
|
|
-- Apply decomposition to the slice of the string starting from given
|
|
-- offset.
|
|
|
|
procedure Apply_Canonical_Composition
|
|
(Result_Data : in out VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
From_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
From_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
From_Code : VSS.Unicode.Code_Point;
|
|
From_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information)
|
|
with Pre => From_Info.CCC = CCC_NR;
|
|
-- Apply canonical composition to the slice of the string starting
|
|
-- from the given position till the end of the string. Character at
|
|
-- the position must be starter.
|
|
|
|
procedure Lookup_Starter
|
|
(Result_Data : VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Need_Decomposition : out Boolean;
|
|
Skip_Composition : out Boolean;
|
|
Starter_Found : out Boolean;
|
|
Starter_Offset : out VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Code : out VSS.Unicode.Code_Point;
|
|
Starter_Info : out
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information);
|
|
-- Lookup for the last character in the string that is starter
|
|
-- or its decomposition will provide starter.
|
|
|
|
---------------------------------
|
|
-- Apply_Canonical_Composition --
|
|
---------------------------------
|
|
|
|
procedure Apply_Canonical_Composition
|
|
(Result_Data : in out VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
From_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
From_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
From_Code : VSS.Unicode.Code_Point;
|
|
From_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information)
|
|
is
|
|
Previous_CCC :
|
|
VSS.Implementation.UCD_Normalization_UTF8.CCC_Values;
|
|
Current_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Current_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Current_Code : VSS.Unicode.Code_Point;
|
|
Current_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Current_Consumed : Boolean;
|
|
Next_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
|
|
Current_Starter_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Current_Starter_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Current_Starter_Code : VSS.Unicode.Code_Point;
|
|
Current_Starter_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
|
|
New_Starter_Code : VSS.Unicode.Code_Point;
|
|
New_Starter_Buffer :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array (0 .. 3);
|
|
New_Starter_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
|
|
begin
|
|
Previous_CCC := CCC_NR;
|
|
Next_Offset := From_Offset + From_Size;
|
|
|
|
Current_Starter_Offset := From_Offset;
|
|
Current_Starter_Size := From_Size;
|
|
Current_Starter_Code := From_Code;
|
|
Current_Starter_Info := From_Info;
|
|
|
|
loop
|
|
exit when Next_Offset >= Result_Size;
|
|
|
|
Current_Offset := Next_Offset;
|
|
|
|
VSS.Implementation.UTF8_Strings.Unchecked_Decode_Forward
|
|
(Result_Data, Next_Offset, Current_Code);
|
|
|
|
Current_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Current_Code);
|
|
|
|
Current_Size := Next_Offset - Current_Offset;
|
|
Current_Consumed := False;
|
|
|
|
-- Current character may compose with the starter
|
|
|
|
if Current_Starter_Info.First_Index /= 0
|
|
and Current_Info.Last_Index /= 0
|
|
then
|
|
if Previous_CCC < Current_Info.CCC then
|
|
-- Current character is not blocked from the starter and can
|
|
-- be last character of the decomposition mapping of the
|
|
-- some primary composite; thus lookup for primary composite.
|
|
|
|
New_Starter_Code :=
|
|
VSS.Implementation.UCD_Normalization_Common
|
|
.Composition_Mapping
|
|
(Current_Info.Last_Index,
|
|
Current_Starter_Info.First_Index);
|
|
|
|
if New_Starter_Code /= 0 then
|
|
-- Starter and current character is decomposition mapping
|
|
-- of the primary composite, remove current character and
|
|
-- replace starter by the primary composite found.
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Delete
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Offset,
|
|
Current_Size,
|
|
1);
|
|
|
|
VSS.Implementation.UTF8_Encoding.Encode
|
|
(New_Starter_Code,
|
|
New_Starter_Size,
|
|
New_Starter_Buffer (0),
|
|
New_Starter_Buffer (1),
|
|
New_Starter_Buffer (2),
|
|
New_Starter_Buffer (3));
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Replace
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Starter_Offset,
|
|
Current_Starter_Size,
|
|
1,
|
|
New_Starter_Buffer,
|
|
New_Starter_Buffer'First,
|
|
New_Starter_Size,
|
|
1);
|
|
|
|
Next_Offset :=
|
|
Next_Offset + New_Starter_Size
|
|
- Current_Starter_Size - Current_Size;
|
|
|
|
Current_Starter_Size := New_Starter_Size;
|
|
Current_Starter_Code := New_Starter_Code;
|
|
Current_Starter_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, New_Starter_Code);
|
|
|
|
Current_Consumed := True;
|
|
|
|
else
|
|
Previous_CCC := Current_Info.CCC;
|
|
end if;
|
|
|
|
elsif Previous_CCC = CCC_NR and Current_Info.CCC = CCC_NR then
|
|
-- Two starters
|
|
|
|
New_Starter_Code :=
|
|
VSS.Implementation.UCD_Normalization_Common
|
|
.Composition_Mapping
|
|
(Current_Info.Last_Index,
|
|
Current_Starter_Info.First_Index);
|
|
|
|
if New_Starter_Code /= 0 then
|
|
-- Starter and current character is decomposition mapping
|
|
-- of the primary composite, remove current character and
|
|
-- replace starter by the primary composite found.
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Delete
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Offset,
|
|
Current_Size,
|
|
1);
|
|
|
|
VSS.Implementation.UTF8_Encoding.Encode
|
|
(New_Starter_Code,
|
|
New_Starter_Size,
|
|
New_Starter_Buffer (0),
|
|
New_Starter_Buffer (1),
|
|
New_Starter_Buffer (2),
|
|
New_Starter_Buffer (3));
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Replace
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Starter_Offset,
|
|
Current_Starter_Size,
|
|
1,
|
|
New_Starter_Buffer,
|
|
New_Starter_Buffer'First,
|
|
New_Starter_Size,
|
|
1);
|
|
|
|
Next_Offset :=
|
|
Next_Offset + New_Starter_Size
|
|
- Current_Starter_Size - Current_Size;
|
|
|
|
Current_Starter_Size := New_Starter_Size;
|
|
Current_Starter_Code := New_Starter_Code;
|
|
Current_Starter_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, New_Starter_Code);
|
|
|
|
Current_Consumed := True;
|
|
end if;
|
|
end if;
|
|
|
|
elsif Current_Code in 16#1161# .. 16#1175#
|
|
and Current_Starter_Code in 16#1100# .. 16#1112#
|
|
then
|
|
-- Hangul Syllable Composition:
|
|
--
|
|
-- Leading consonant + Vowel => LV_Syllable
|
|
|
|
declare
|
|
Starter_Buffer :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array
|
|
(0 .. 3);
|
|
L_Index : VSS.Unicode.Code_Point;
|
|
V_Index : VSS.Unicode.Code_Point;
|
|
LV_Index : VSS.Unicode.Code_Point;
|
|
|
|
begin
|
|
L_Index := Current_Starter_Code - L_Base;
|
|
V_Index := Current_Code - V_Base;
|
|
LV_Index := L_Index * N_Count + V_Index * T_Count;
|
|
|
|
Current_Starter_Code := S_Base + LV_Index;
|
|
Current_Starter_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Current_Starter_Code);
|
|
|
|
-- Encoded size of all possible characters are, same, so
|
|
-- reuse variable.
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Delete
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Offset,
|
|
Current_Size,
|
|
1);
|
|
|
|
VSS.Implementation.UTF8_Encoding.Encode
|
|
(Current_Starter_Code,
|
|
Current_Starter_Size,
|
|
Starter_Buffer (0),
|
|
Starter_Buffer (1),
|
|
Starter_Buffer (2),
|
|
Starter_Buffer (3));
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Replace
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Starter_Offset,
|
|
Current_Starter_Size,
|
|
1,
|
|
Starter_Buffer,
|
|
Starter_Buffer'First,
|
|
Current_Starter_Size,
|
|
1);
|
|
|
|
Next_Offset := Next_Offset - Current_Size;
|
|
|
|
Current_Consumed := True;
|
|
end;
|
|
|
|
elsif Current_Code in 16#11A8# .. 16#11C2#
|
|
and Current_Starter_Code in 16#AC00# .. 16#D7A3#
|
|
then
|
|
-- Hangul Syllable Composition:
|
|
--
|
|
-- LV_Syllable + Trailing consonant => LVT_Syllable
|
|
|
|
declare
|
|
Starter_Buffer :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array
|
|
(0 .. 3);
|
|
S_Index : VSS.Unicode.Code_Point;
|
|
T_Index : VSS.Unicode.Code_Point;
|
|
|
|
begin
|
|
S_Index := Current_Starter_Code - S_Base;
|
|
T_Index := S_Index mod T_Count;
|
|
|
|
if T_Index = 0 then
|
|
-- Starter is LV_Syllable, can compose with
|
|
-- current T_Jamo.
|
|
|
|
T_Index := Current_Code - T_Base;
|
|
|
|
Current_Starter_Code := Current_Starter_Code + T_Index;
|
|
Current_Starter_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Current_Starter_Code);
|
|
|
|
-- Encoded size of all possible characters are, same, so
|
|
-- reuse variable.
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Delete
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Offset,
|
|
Current_Size,
|
|
1);
|
|
|
|
VSS.Implementation.UTF8_Encoding.Encode
|
|
(Current_Starter_Code,
|
|
Current_Starter_Size,
|
|
Starter_Buffer (0),
|
|
Starter_Buffer (1),
|
|
Starter_Buffer (2),
|
|
Starter_Buffer (3));
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Replace
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Starter_Offset,
|
|
Current_Starter_Size,
|
|
1,
|
|
Starter_Buffer,
|
|
Starter_Buffer'First,
|
|
Current_Starter_Size,
|
|
1);
|
|
|
|
Next_Offset := Next_Offset - Current_Size;
|
|
|
|
Current_Consumed := True;
|
|
end if;
|
|
end;
|
|
end if;
|
|
|
|
if not Current_Consumed then
|
|
Previous_CCC := Current_Info.CCC;
|
|
|
|
-- Current character is new starter
|
|
|
|
if Current_Info.CCC = CCC_NR then
|
|
Current_Starter_Offset := Current_Offset;
|
|
Current_Starter_Size := Current_Size;
|
|
Current_Starter_Code := Current_Code;
|
|
Current_Starter_Info := Current_Info;
|
|
end if;
|
|
end if;
|
|
end loop;
|
|
end Apply_Canonical_Composition;
|
|
|
|
-------------------------
|
|
-- Apply_Decomposition --
|
|
-------------------------
|
|
|
|
procedure Apply_Decomposition
|
|
(Result_Data : in out
|
|
VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : in out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
From_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
From_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
From_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Skip_Compositon : out Boolean;
|
|
Starter_Offset : out VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Code : out VSS.Unicode.Code_Point;
|
|
Starter_Info : out
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Previous_CCC : out
|
|
VSS.Implementation.UCD_Normalization_UTF8.CCC_Values)
|
|
is
|
|
Current_Offset : VSS.Unicode.UTF8_Code_Unit_Offset := From_Offset;
|
|
Current_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Current_Code : VSS.Unicode.Code_Point;
|
|
Current_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information :=
|
|
From_Info;
|
|
Next_Offset : VSS.Unicode.UTF8_Code_Unit_Offset :=
|
|
From_Offset + From_Size;
|
|
|
|
begin
|
|
-- By convention, this subprogram called only when there is at least
|
|
-- one character is available.
|
|
|
|
Skip_Compositon := True;
|
|
Previous_CCC := CCC_NR;
|
|
|
|
loop
|
|
if Current_Info.Has_Starter then
|
|
-- XXX By convention, when full decomposition has starter
|
|
-- character(s) the first characters of the decomposition is
|
|
-- starter too. This should be checked by code generator.
|
|
|
|
if Has_Decomposition (Current_Info) then
|
|
Current_Size := Next_Offset - Current_Offset;
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Replace
|
|
(Result_Data,
|
|
Result_Size,
|
|
Current_Offset,
|
|
Current_Size,
|
|
1,
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.UTF8_Data_Table,
|
|
Current_Info.Offset,
|
|
Current_Info.Size,
|
|
Current_Info.Length);
|
|
|
|
Next_Offset :=
|
|
Next_Offset + Current_Info.Size - Current_Size;
|
|
Previous_CCC := Current_Info.Last_CCC;
|
|
|
|
else
|
|
raise Program_Error;
|
|
end if;
|
|
|
|
else
|
|
-- Not a starter, do decompositon with canonical reordering.
|
|
|
|
if Has_Decomposition (Current_Info) then
|
|
raise Program_Error;
|
|
|
|
else
|
|
if Previous_CCC <= Current_Info.CCC then
|
|
-- No decomposition, canonical ordering preserved.
|
|
|
|
Previous_CCC := Current_Info.CCC;
|
|
|
|
else
|
|
-- Violation of canonical reordering, move character
|
|
-- into appropriate position.
|
|
|
|
declare
|
|
Previous_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Previous_Code : VSS.Unicode.Code_Point;
|
|
Previous_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Mapping_Information;
|
|
Insert_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
|
|
begin
|
|
Current_Size := Next_Offset - Current_Offset;
|
|
Previous_Offset := Current_Offset;
|
|
|
|
loop
|
|
Insert_Offset := Previous_Offset;
|
|
|
|
VSS.Implementation.UTF8_Strings
|
|
.Unchecked_Backward_Decode
|
|
(Result_Data, Previous_Offset, Previous_Code);
|
|
|
|
Previous_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Previous_Code);
|
|
|
|
exit when Previous_Info.CCC = CCC_NR
|
|
or Previous_Info.CCC <= Current_Info.CCC;
|
|
end loop;
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Move_Slice
|
|
(Result_Data,
|
|
Current_Offset,
|
|
Current_Size,
|
|
Insert_Offset);
|
|
end;
|
|
end if;
|
|
end if;
|
|
end if;
|
|
|
|
exit when Next_Offset >= Result_Size;
|
|
|
|
Current_Offset := Next_Offset;
|
|
|
|
VSS.Implementation.UTF8_Strings.Unchecked_Decode_Forward
|
|
(Result_Data, Next_Offset, Current_Code);
|
|
|
|
Current_Info :=
|
|
Get_Decomposition_Information (Decomposition_Data, Current_Code);
|
|
end loop;
|
|
|
|
Next_Offset := From_Offset;
|
|
|
|
VSS.Implementation.UTF8_Strings.Unchecked_Decode_Forward
|
|
(Result_Data, Next_Offset, Current_Code);
|
|
|
|
Current_Info :=
|
|
Get_Decomposition_Information (Decomposition_Data, Current_Code);
|
|
|
|
if Current_Info.CCC = CCC_NR then
|
|
Skip_Compositon := Current_Info.First_Index = 0;
|
|
Starter_Offset := From_Offset;
|
|
Starter_Size := Next_Offset - From_Offset;
|
|
Starter_Code := Current_Code;
|
|
Starter_Info := Current_Info;
|
|
end if;
|
|
end Apply_Decomposition;
|
|
|
|
--------------
|
|
-- Backward --
|
|
--------------
|
|
|
|
function Backward
|
|
(Data : VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
From : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Offset : out VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Size : out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Code : out VSS.Unicode.Code_Point;
|
|
Info : out
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information)
|
|
return Boolean is
|
|
|
|
begin
|
|
if From = 0 then
|
|
return False;
|
|
end if;
|
|
|
|
Offset := From;
|
|
|
|
VSS.Implementation.UTF8_Strings.Unchecked_Backward_Decode
|
|
(Data, Offset, Code);
|
|
|
|
Info := Get_Decomposition_Information (Decomposition_Data, Code);
|
|
Size := From - Offset;
|
|
|
|
return True;
|
|
end Backward;
|
|
|
|
-----------------------
|
|
-- Has_Decomposition --
|
|
-----------------------
|
|
|
|
function Has_Decomposition
|
|
(Info : VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information)
|
|
return Boolean is
|
|
begin
|
|
return Info.Size /= 0;
|
|
end Has_Decomposition;
|
|
|
|
--------------------
|
|
-- Lookup_Starter --
|
|
--------------------
|
|
|
|
procedure Lookup_Starter
|
|
(Result_Data : VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Result_Size : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Need_Decomposition : out Boolean;
|
|
Skip_Composition : out Boolean;
|
|
Starter_Found : out Boolean;
|
|
Starter_Offset : out VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : out VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Code : out VSS.Unicode.Code_Point;
|
|
Starter_Info : out
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information)
|
|
is
|
|
Current_Offset : VSS.Unicode.UTF8_Code_Unit_Offset := Result_Size;
|
|
Current_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Current_Code : VSS.Unicode.Code_Point;
|
|
Current_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
|
|
begin
|
|
Need_Decomposition := False;
|
|
Skip_Composition := True;
|
|
Starter_Found := False;
|
|
|
|
if Result_Size /= 0 then
|
|
loop
|
|
exit when not Backward
|
|
(Result_Data,
|
|
Current_Offset,
|
|
Current_Offset,
|
|
Current_Size,
|
|
Current_Code,
|
|
Current_Info);
|
|
|
|
Need_Decomposition :=
|
|
Need_Decomposition or Has_Decomposition (Current_Info);
|
|
|
|
if Current_Info.Has_Starter then
|
|
Skip_Composition := Current_Info.First_Index = 0;
|
|
Starter_Found := True;
|
|
Starter_Offset := Current_Offset;
|
|
Starter_Size := Current_Size;
|
|
Starter_Code := Current_Code;
|
|
Starter_Info := Current_Info;
|
|
|
|
exit;
|
|
end if;
|
|
end loop;
|
|
end if;
|
|
end Lookup_Starter;
|
|
|
|
Result_Size : VSS.Unicode.UTF8_Code_Unit_Count := 0;
|
|
Source_Current_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Source_Next_Offset : VSS.Unicode.UTF8_Code_Unit_Offset := 0;
|
|
Source_Copy_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Source_Length_Delta : VSS.Implementation.Strings.Character_Count;
|
|
Source_Code : VSS.Unicode.Code_Point;
|
|
Source_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information;
|
|
Last_CCC :
|
|
VSS.Implementation.UCD_Normalization_UTF8.CCC_Values :=
|
|
CCC_NR;
|
|
|
|
begin
|
|
loop
|
|
exit when Source_Next_Offset >= Source_Size;
|
|
|
|
Source_Copy_Offset := Source_Next_Offset;
|
|
Source_Length_Delta := 0;
|
|
|
|
-- Lookup source string for the longest slice already in
|
|
-- normalization form.
|
|
|
|
loop
|
|
Source_Current_Offset := Source_Next_Offset;
|
|
|
|
exit when Source_Next_Offset >= Source_Size;
|
|
|
|
VSS.Implementation.UTF8_Encoding.Unchecked_Decode_Forward
|
|
(Source_Storage, Source_Next_Offset, Source_Code);
|
|
|
|
Source_Info :=
|
|
Get_Decomposition_Information (Decomposition_Data, Source_Code);
|
|
|
|
case Source_Info.Composition_QC is
|
|
when No =>
|
|
-- Character that cannot ever occur in the respective
|
|
-- normalization form.
|
|
|
|
exit;
|
|
|
|
when Maybe =>
|
|
-- Character that may occur in the respective
|
|
-- normalization form, depending on the context.
|
|
|
|
exit;
|
|
|
|
when Yes =>
|
|
-- Character that may occur in the respective
|
|
-- normalization form.
|
|
|
|
if Last_CCC > Source_Info.CCC
|
|
and Source_Info.CCC /= CCC_NR
|
|
then
|
|
-- Violation of the canonical ordering.
|
|
|
|
exit;
|
|
end if;
|
|
end case;
|
|
|
|
Last_CCC := Source_Info.CCC;
|
|
Source_Length_Delta := Source_Length_Delta + 1;
|
|
end loop;
|
|
|
|
-- Copy found normalized data
|
|
|
|
if Source_Copy_Offset /= Source_Current_Offset then
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Copy_Offset,
|
|
Source_Current_Offset - Source_Copy_Offset,
|
|
Source_Length_Delta);
|
|
end if;
|
|
|
|
exit when Source_Current_Offset >= Source_Size;
|
|
|
|
case Source_Info.Composition_QC is
|
|
when No =>
|
|
-- Character that cannot ever occur in the respective
|
|
-- normalization form.
|
|
|
|
if Source_Info.Has_Starter then
|
|
pragma Assert (Source_Info.First_CCC = CCC_NR);
|
|
pragma Assert (Source_Info.Last_Index = 0);
|
|
pragma Assert (Has_Decomposition (Source_Info));
|
|
|
|
declare
|
|
Starter_Code : VSS.Unicode.Code_Point;
|
|
Next_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Skip_Composition : Boolean;
|
|
Starter_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Mapping_Information;
|
|
|
|
begin
|
|
Starter_Offset := Result_Size;
|
|
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations
|
|
.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.UTF8_Data_Table,
|
|
Source_Info.Offset,
|
|
Source_Info.Size,
|
|
Source_Info.Length);
|
|
Last_CCC := Source_Info.Last_CCC;
|
|
|
|
-- If last starter in full decomposition can be first
|
|
-- character of the pair of canonical decomposition of
|
|
-- the primary composite - process all following
|
|
-- characters till next starter/end of string and do
|
|
-- composition.
|
|
--
|
|
-- XXX this is not implemented yet
|
|
|
|
-- XXX There are 2 bits available in Info record, can it
|
|
-- be used to encode length of the starter in the full
|
|
-- decomposition?
|
|
|
|
Next_Offset := Starter_Offset;
|
|
|
|
VSS.Implementation.UTF8_Strings.Unchecked_Decode_Forward
|
|
(Result_Data, Next_Offset, Starter_Code);
|
|
|
|
Starter_Size := Next_Offset - Starter_Offset;
|
|
Starter_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Starter_Code);
|
|
Skip_Composition := False;
|
|
|
|
pragma Assert (Starter_Info.Last_Index = 0);
|
|
|
|
if not Skip_Composition then
|
|
loop
|
|
exit when Source_Next_Offset >= Source_Size;
|
|
|
|
Source_Current_Offset := Source_Next_Offset;
|
|
|
|
VSS.Implementation.UTF8_Encoding
|
|
.Unchecked_Decode_Forward
|
|
(Source_Storage,
|
|
Source_Next_Offset,
|
|
Source_Code);
|
|
|
|
Source_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Source_Code);
|
|
|
|
-- Append decompositions of all characters from
|
|
-- the source string till next starter.
|
|
|
|
if Source_Info.Has_Starter then
|
|
-- It is tested previously, and need to be
|
|
-- analyzed: first character in the full
|
|
-- decomposition should be starter by our
|
|
-- convention, but it may composite with the
|
|
-- previous starter. NF * C_QC may be used
|
|
-- here to do check, or Last_Index /= 0...
|
|
|
|
raise Program_Error;
|
|
end if;
|
|
|
|
if Has_Decomposition (Source_Info) then
|
|
raise Program_Error;
|
|
-- if Source_Info.Has_Starter then
|
|
-- raise Program_Error;
|
|
--
|
|
-- else
|
|
-- if Last_CCC <= Source_Info.First_CCC then
|
|
-- Unchecked_Append
|
|
-- (Result_Data,
|
|
-- Result_Size,
|
|
-- VSS.Implementation.UCD_Normalization_UTF8
|
|
-- .UTF8_Data_Table,
|
|
-- Source_Info.Offset,
|
|
-- Source_Info.Size,
|
|
-- Source_Info.Length);
|
|
-- Last_CCC := Source_Info.Last_CCC;
|
|
--
|
|
-- else
|
|
-- raise Program_Error;
|
|
-- end if;
|
|
--
|
|
-- end if;
|
|
|
|
else
|
|
-- if Source_Info.CCC = CCC_NR then
|
|
-- raise Program_Error;
|
|
--
|
|
-- else
|
|
if Last_CCC <= Source_Info.CCC then
|
|
VSS.Implementation.UTF8_Strings.
|
|
Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset
|
|
- Source_Current_Offset,
|
|
1);
|
|
Last_CCC := Source_Info.CCC;
|
|
|
|
else
|
|
raise Program_Error;
|
|
|
|
-- XXX Only single character is processed
|
|
-- here, thus Append_Reordered do
|
|
-- unnecessary action, because it can
|
|
-- process sequence of characters. So, it
|
|
-- is possible to optimize code here by
|
|
-- adding another subprogram to process
|
|
-- single character only.
|
|
|
|
-- Append_Reordered
|
|
-- (Result_Data,
|
|
-- Result_Size,
|
|
-- Decomposition_Data,
|
|
-- Last_CCC,
|
|
-- Source_Storage,
|
|
-- Source_Current_Offset,
|
|
-- Source_Next_Offset
|
|
-- - Source_Current_Offset);
|
|
end if;
|
|
end if;
|
|
end loop;
|
|
|
|
-- Do canonical composition
|
|
|
|
Apply_Canonical_Composition
|
|
(Result_Data,
|
|
Result_Size,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Starter_Code,
|
|
Starter_Info);
|
|
end if;
|
|
end;
|
|
|
|
else
|
|
-- XXX Duplicated with Maybe/non-starter case?
|
|
|
|
declare
|
|
Need_Decomposition : Boolean;
|
|
Skip_Composition : Boolean;
|
|
Starter_Found : Boolean;
|
|
Starter_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Code : VSS.Unicode.Code_Point;
|
|
Starter_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Mapping_Information;
|
|
|
|
begin
|
|
-- Lookup backward till starter character will be found
|
|
|
|
Lookup_Starter
|
|
(Result_Data,
|
|
Result_Size,
|
|
Need_Decomposition,
|
|
Skip_Composition,
|
|
Starter_Found,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Starter_Code,
|
|
Starter_Info);
|
|
|
|
if Need_Decomposition then
|
|
declare
|
|
Aux_Starter_Info : constant
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Mapping_Information := Starter_Info;
|
|
-- Workaround for GNAT warning about overlapped
|
|
-- parameters.
|
|
|
|
begin
|
|
Apply_Decomposition
|
|
(Result_Data,
|
|
Result_Size,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Aux_Starter_Info,
|
|
Skip_Composition,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Starter_Code,
|
|
Starter_Info,
|
|
Last_CCC);
|
|
end;
|
|
end if;
|
|
|
|
-- Append decompositions of all characters from the
|
|
-- source string till next starter.
|
|
|
|
loop
|
|
if Has_Decomposition (Source_Info) then
|
|
if Source_Info.First_CCC = CCC_NR then
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.UTF8_Data_Table,
|
|
Source_Info.Offset,
|
|
Source_Info.Size,
|
|
Source_Info.Length);
|
|
Last_CCC := Source_Info.Last_CCC;
|
|
|
|
exit;
|
|
|
|
elsif Last_CCC <= Source_Info.First_CCC then
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.UTF8_Data_Table,
|
|
Source_Info.Offset,
|
|
Source_Info.Size,
|
|
Source_Info.Length);
|
|
Last_CCC := Source_Info.Last_CCC;
|
|
|
|
else
|
|
raise Program_Error;
|
|
end if;
|
|
|
|
else
|
|
if Source_Info.CCC = CCC_NR then
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset,
|
|
1);
|
|
Last_CCC := Source_Info.CCC;
|
|
|
|
exit;
|
|
|
|
elsif Last_CCC <= Source_Info.CCC then
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset,
|
|
1);
|
|
Last_CCC := Source_Info.CCC;
|
|
|
|
else
|
|
-- XXX Only single character is processed here,
|
|
-- thus Append_Reordered do unnecessary action,
|
|
-- because it can process sequence of
|
|
-- characters. So, it is possible to optimize
|
|
-- code here by adding another subprogram to
|
|
-- process single character only.
|
|
|
|
Append_Reordered
|
|
(Result_Data,
|
|
Result_Size,
|
|
Decomposition_Data,
|
|
Last_CCC,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset);
|
|
end if;
|
|
end if;
|
|
|
|
exit when Source_Next_Offset >= Source_Size;
|
|
|
|
Source_Current_Offset := Source_Next_Offset;
|
|
|
|
VSS.Implementation.UTF8_Encoding
|
|
.Unchecked_Decode_Forward
|
|
(Source_Storage, Source_Next_Offset, Source_Code);
|
|
|
|
Source_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Source_Code);
|
|
end loop;
|
|
|
|
-- Do canonical composition when necessary
|
|
|
|
if not Skip_Composition then
|
|
Apply_Canonical_Composition
|
|
(Result_Data,
|
|
Result_Size,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Starter_Code,
|
|
Starter_Info);
|
|
end if;
|
|
end;
|
|
end if;
|
|
|
|
when Maybe =>
|
|
-- Character that may occur in the respective normalization
|
|
-- form, depending on the context.
|
|
|
|
if Source_Info.Has_Starter then
|
|
if Has_Decomposition (Source_Info) then
|
|
raise Program_Error;
|
|
|
|
elsif Source_Code in 16#1161# .. 16#1175# then
|
|
pragma Assert (Source_Info.Last_Index = 0);
|
|
-- Algorithmic composition, character must not compose
|
|
-- with previous character in ordinary way.
|
|
|
|
declare
|
|
Starter_Code : VSS.Unicode.Code_Point;
|
|
Starter_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Buffer :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array
|
|
(0 .. 3);
|
|
L_Index : VSS.Unicode.Code_Point;
|
|
V_Index : VSS.Unicode.Code_Point;
|
|
LV_Index : VSS.Unicode.Code_Point;
|
|
Append_Source : Boolean := True;
|
|
|
|
begin
|
|
if Result_Size /= 0 then
|
|
Starter_Offset := Result_Size;
|
|
|
|
VSS.Implementation.UTF8_Strings.
|
|
Unchecked_Backward_Decode
|
|
(Result_Data, Starter_Offset, Starter_Code);
|
|
|
|
if Starter_Code in 16#1100# .. 16#1112# then
|
|
L_Index := Starter_Code - L_Base;
|
|
V_Index := Source_Code - V_Base;
|
|
LV_Index :=
|
|
L_Index * N_Count + V_Index * T_Count;
|
|
|
|
Starter_Code := S_Base + LV_Index;
|
|
|
|
VSS.Implementation.UTF8_Encoding.Encode
|
|
(Starter_Code,
|
|
Starter_Size,
|
|
Starter_Buffer (0),
|
|
Starter_Buffer (1),
|
|
Starter_Buffer (2),
|
|
Starter_Buffer (3));
|
|
|
|
-- Encoded size of all possible characters are,
|
|
-- same, so use it.
|
|
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Replace
|
|
(Result_Data,
|
|
Result_Size,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
1,
|
|
Starter_Buffer,
|
|
Starter_Buffer'First,
|
|
Starter_Size,
|
|
1);
|
|
|
|
Append_Source := False;
|
|
end if;
|
|
end if;
|
|
|
|
if Append_Source then
|
|
-- Composition is impossible, append character
|
|
-- to the result and reset canonical combining
|
|
-- class of the last character.
|
|
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset,
|
|
1);
|
|
|
|
Last_CCC := CCC_NR;
|
|
end if;
|
|
end;
|
|
|
|
elsif Source_Code in 16#11A8# .. 16#11C2# then
|
|
pragma Assert (Source_Info.Last_Index = 0);
|
|
-- Algorithmic composition, character must not compose
|
|
-- with previous character in ordinary way.
|
|
|
|
declare
|
|
Starter_Code : VSS.Unicode.Code_Point;
|
|
Starter_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Buffer :
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array
|
|
(0 .. 3);
|
|
S_Index : VSS.Unicode.Code_Point;
|
|
T_Index : VSS.Unicode.Code_Point;
|
|
Append_Source : Boolean := True;
|
|
|
|
begin
|
|
if Result_Size /= 0 then
|
|
Starter_Offset := Result_Size;
|
|
|
|
VSS.Implementation.UTF8_Strings
|
|
.Unchecked_Backward_Decode
|
|
(Result_Data, Starter_Offset, Starter_Code);
|
|
|
|
if Starter_Code in 16#AC00# .. 16#D7A3# then
|
|
S_Index := Starter_Code - S_Base;
|
|
T_Index := S_Index mod T_Count;
|
|
|
|
if T_Index = 0 then
|
|
-- Starter is LV_Syllable, can compose with
|
|
-- current T_Jamo.
|
|
|
|
T_Index := Source_Code - T_Base;
|
|
|
|
Starter_Code := Starter_Code + T_Index;
|
|
|
|
VSS.Implementation.UTF8_Encoding.Encode
|
|
(Starter_Code,
|
|
Starter_Size,
|
|
Starter_Buffer (0),
|
|
Starter_Buffer (1),
|
|
Starter_Buffer (2),
|
|
Starter_Buffer (3));
|
|
|
|
-- Encoded size of all possible characters
|
|
-- are same, so use it.
|
|
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Replace
|
|
(Result_Data,
|
|
Result_Size,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
1,
|
|
Starter_Buffer,
|
|
Starter_Buffer'First,
|
|
Starter_Size,
|
|
1);
|
|
|
|
Append_Source := False;
|
|
end if;
|
|
end if;
|
|
end if;
|
|
|
|
if Append_Source then
|
|
-- Composition is impossible, append character
|
|
-- to the result and reset canonical combining
|
|
-- class of the last character.
|
|
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset,
|
|
1);
|
|
|
|
Last_CCC := CCC_NR;
|
|
end if;
|
|
end;
|
|
|
|
else
|
|
pragma Assert (Source_Info.Last_Index /= 0);
|
|
-- Character may compose with previous character.
|
|
|
|
declare
|
|
Starter_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Code : VSS.Unicode.Code_Point;
|
|
Starter_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Mapping_Information;
|
|
|
|
begin
|
|
Starter_Offset := Result_Size;
|
|
|
|
VSS.Implementation.UTF8_Strings
|
|
.Unchecked_Backward_Decode
|
|
(Result_Data, Starter_Offset, Starter_Code);
|
|
|
|
Starter_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Starter_Code);
|
|
|
|
if Starter_Info.Has_Starter then
|
|
-- XXX Don't need to decomposite previous
|
|
-- character ???
|
|
-- if Has_Decomposition (Starter_Info) then
|
|
-- raise Program_Error;
|
|
--
|
|
-- else
|
|
if Starter_Info.First_Index /= 0 then
|
|
Starter_Size := Result_Size - Starter_Offset;
|
|
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset,
|
|
1);
|
|
|
|
Apply_Canonical_Composition
|
|
(Result_Data,
|
|
Result_Size,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Starter_Code,
|
|
Starter_Info);
|
|
end if;
|
|
|
|
else
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset,
|
|
1);
|
|
|
|
Last_CCC := CCC_NR;
|
|
end if;
|
|
end;
|
|
end if;
|
|
|
|
else
|
|
declare
|
|
Need_Decomposition : Boolean;
|
|
Skip_Composition : Boolean;
|
|
Starter_Found : Boolean;
|
|
Starter_Offset : VSS.Unicode.UTF8_Code_Unit_Offset;
|
|
Starter_Size : VSS.Unicode.UTF8_Code_Unit_Count;
|
|
Starter_Code : VSS.Unicode.Code_Point;
|
|
Starter_Info :
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Mapping_Information;
|
|
|
|
begin
|
|
-- Lookup backward till starter character will be found
|
|
|
|
Lookup_Starter
|
|
(Result_Data,
|
|
Result_Size,
|
|
Need_Decomposition,
|
|
Skip_Composition,
|
|
Starter_Found,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Starter_Code,
|
|
Starter_Info);
|
|
|
|
if Need_Decomposition then
|
|
declare
|
|
Aux_Starter_Info : constant
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Mapping_Information := Starter_Info;
|
|
-- Workaround of GNAT warning about overlapping
|
|
-- parameters.
|
|
|
|
begin
|
|
Apply_Decomposition
|
|
(Result_Data,
|
|
Result_Size,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Aux_Starter_Info,
|
|
Skip_Composition,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Starter_Code,
|
|
Starter_Info,
|
|
Last_CCC);
|
|
end;
|
|
end if;
|
|
|
|
-- Append decompositions of all characters from the
|
|
-- source string till next starter.
|
|
|
|
loop
|
|
-- if Source_Info.Has_Starter then
|
|
-- It is tested previously, and need to be
|
|
-- analyzed: first character in the full decomposition
|
|
-- should be starter by our convention, but it may
|
|
-- composite with the previous starter. NF*C_QC may
|
|
-- be used here to do check, or Last_Index /= 0...
|
|
|
|
-- raise Program_Error;
|
|
-- end if;
|
|
|
|
if Has_Decomposition (Source_Info) then
|
|
if Source_Info.First_CCC = CCC_NR then
|
|
raise Program_Error;
|
|
|
|
elsif Last_CCC <= Source_Info.First_CCC then
|
|
raise Program_Error;
|
|
|
|
else
|
|
Append_Reordered
|
|
(Result_Data,
|
|
Result_Size,
|
|
Decomposition_Data,
|
|
Last_CCC,
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.UTF8_Data_Table,
|
|
Source_Info.Offset,
|
|
Source_Info.Size);
|
|
end if;
|
|
|
|
else
|
|
if Source_Info.CCC = CCC_NR then
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset,
|
|
1);
|
|
Last_CCC := Source_Info.CCC;
|
|
|
|
exit;
|
|
|
|
elsif Last_CCC <= Source_Info.CCC then
|
|
VSS.Implementation.UTF8_Strings
|
|
.Mutable_Operations.Unchecked_Append
|
|
(Result_Data,
|
|
Result_Size,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset,
|
|
1);
|
|
Last_CCC := Source_Info.CCC;
|
|
|
|
else
|
|
-- XXX Only single character is processed here,
|
|
-- thus Append_Reordered do unnecessary action,
|
|
-- because it can process sequence of
|
|
-- characters. So, it is possible to optimize
|
|
-- code here by adding another subprogram to
|
|
-- process single character only.
|
|
|
|
Append_Reordered
|
|
(Result_Data,
|
|
Result_Size,
|
|
Decomposition_Data,
|
|
Last_CCC,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset);
|
|
end if;
|
|
end if;
|
|
|
|
exit when Source_Next_Offset >= Source_Size;
|
|
|
|
Source_Current_Offset := Source_Next_Offset;
|
|
|
|
VSS.Implementation.UTF8_Encoding
|
|
.Unchecked_Decode_Forward
|
|
(Source_Storage, Source_Next_Offset, Source_Code);
|
|
|
|
Source_Info :=
|
|
Get_Decomposition_Information
|
|
(Decomposition_Data, Source_Code);
|
|
end loop;
|
|
|
|
-- Do canonical composition when necessary
|
|
|
|
if not Skip_Composition then
|
|
Apply_Canonical_Composition
|
|
(Result_Data,
|
|
Result_Size,
|
|
Starter_Offset,
|
|
Starter_Size,
|
|
Starter_Code,
|
|
Starter_Info);
|
|
end if;
|
|
end;
|
|
end if;
|
|
|
|
when Yes =>
|
|
if Last_CCC <= Source_Info.CCC and Source_Info.CCC = CCC_NR then
|
|
-- Precondition, should be removed.
|
|
|
|
raise Program_Error;
|
|
end if;
|
|
|
|
-- XXX Only single character is appended here! May be
|
|
-- optimized?
|
|
|
|
Append_Reordered
|
|
(Result_Data,
|
|
Result_Size,
|
|
Decomposition_Data,
|
|
Last_CCC,
|
|
Source_Storage,
|
|
Source_Current_Offset,
|
|
Source_Next_Offset - Source_Current_Offset);
|
|
end case;
|
|
end loop;
|
|
end Decompose_And_Compose;
|
|
|
|
-----------------------------------
|
|
-- Get_Decomposition_Information --
|
|
-----------------------------------
|
|
|
|
function Get_Decomposition_Information
|
|
(Decomposition_Data :
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset_Array;
|
|
Code : VSS.Unicode.Code_Point)
|
|
return VSS.Implementation.UCD_Normalization_UTF8.Mapping_Information
|
|
is
|
|
use type VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset;
|
|
use type VSS.Unicode.Code_Point;
|
|
|
|
Group :
|
|
constant VSS.Implementation.UCD_Normalization_UTF8.Mapping_Group :=
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Group
|
|
(Code
|
|
/ VSS.Implementation.UCD_Normalization_UTF8.Mapping_Group_Size);
|
|
Offset : constant
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset :=
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Offset
|
|
(Code
|
|
mod VSS.Implementation.UCD_Normalization_UTF8.Mapping_Group_Size);
|
|
|
|
begin
|
|
return
|
|
VSS.Implementation.UCD_Normalization_UTF8.Mapping_Data_Table
|
|
(Decomposition_Data (Group) + Offset);
|
|
end Get_Decomposition_Information;
|
|
|
|
---------------
|
|
-- Normalize --
|
|
---------------
|
|
|
|
procedure Normalize
|
|
(Text : VSS.Implementation.UTF8_Strings.UTF8_String_Data;
|
|
Form : VSS.Implementation.Normalization_Form;
|
|
Result : out VSS.Implementation.UTF8_Strings.UTF8_String_Data) is
|
|
begin
|
|
VSS.Implementation.UTF8_Strings.Mutable_Operations.Initialize
|
|
(Result, Text.Size);
|
|
|
|
if Text.Size = 0 then
|
|
return;
|
|
end if;
|
|
|
|
declare
|
|
Storage : constant
|
|
VSS.Implementation.UTF8_Encoding.UTF8_Code_Unit_Array
|
|
(0 .. Text.Size)
|
|
with Import, Address => Text.Storage_Address;
|
|
|
|
begin
|
|
case Form is
|
|
when VSS.Implementation.Normalization_Form_D =>
|
|
VSS.Implementation.UTF8_Normalization.Decompose
|
|
(Storage,
|
|
Text.Size,
|
|
VSS.Implementation.UCD_Normalization_UTF8.Canonical_Index,
|
|
Result);
|
|
|
|
when VSS.Implementation.Normalization_Form_C =>
|
|
VSS.Implementation.UTF8_Normalization.Decompose_And_Compose
|
|
(Storage,
|
|
Text.Size,
|
|
VSS.Implementation.UCD_Normalization_UTF8.Canonical_Index,
|
|
Result);
|
|
|
|
when VSS.Implementation.Normalization_Form_KD =>
|
|
VSS.Implementation.UTF8_Normalization.Decompose
|
|
(Storage,
|
|
Text.Size,
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Compatibility_Index,
|
|
Result);
|
|
|
|
when VSS.Implementation.Normalization_Form_KC =>
|
|
VSS.Implementation.UTF8_Normalization.Decompose_And_Compose
|
|
(Storage,
|
|
Text.Size,
|
|
VSS.Implementation.UCD_Normalization_UTF8
|
|
.Compatibility_Index,
|
|
Result);
|
|
end case;
|
|
end;
|
|
end Normalize;
|
|
|
|
end VSS.Implementation.UTF8_Normalization;
|