UTF-8 validate strings coming from spawned processes

... before creating Python string objects representing them.

This defense protects against storage_errors occuring in Python
frames as the binding attempts to create non-valid objects.

Fixes #93.
This commit is contained in:
Nicolas Setton
2023-09-08 16:02:25 +00:00
parent 807aa4f34b
commit cfe52f1fab
4 changed files with 85 additions and 5 deletions

View File

@@ -44,6 +44,7 @@ with GPS.Kernel.Timeout; use GPS.Kernel.Timeout;
with GPS.Scripts.Commands; use GPS.Scripts.Commands;
with Remote; use Remote;
with Commands; use Commands;
with UTF8_Utils; use UTF8_Utils;
package body Expect_Interface is
@@ -120,6 +121,34 @@ package body Expect_Interface is
(Data : in out Callback_Data'Class; Command : String);
-- Interactive command handler for the expect interface
function To_UTF8_And_Warn
(Kernel : Kernel_Handle;
Str : String) return String;
-- Convert Str to UTF8 and return the result. The conversion is done
-- with Unknown_To_UTF8, meaning that it will first UTF8-validate,
-- and convert from the locale if this is not valid.
-- If the conversion fails, an error message is printed in the console.
----------------------
-- To_UTF8_And_Warn --
----------------------
function To_UTF8_And_Warn
(Kernel : Kernel_Handle;
Str : String) return String
is
Success : aliased Boolean;
Result : constant String := Unknown_To_UTF8 (Str, Success'Access);
begin
if not Success then
Insert_UTF8
(Kernel,
"Could not convert process output to UTF8",
Mode => Error);
end if;
return Result;
end To_UTF8_And_Warn;
---------------
-- Deep_Copy --
---------------
@@ -216,9 +245,15 @@ package body Expect_Interface is
if Self.Exit_Status > 0 then
-- Error detected: Append the Exit_Output
Set_Nth_Arg
(C, 3, To_String (Self.Unmatched_Output & Self.Exit_Output));
(C, 3,
To_UTF8_And_Warn
(Self.Kernel,
To_String (Self.Unmatched_Output & Self.Exit_Output)));
else
Set_Nth_Arg (C, 3, To_String (Self.Unmatched_Output));
Set_Nth_Arg
(C, 3,
To_UTF8_And_Warn
(Self.Kernel, To_String (Self.Unmatched_Output)));
end if;
Dummy := Execute (Self.On_Exit, C);
Free (C);
@@ -245,7 +280,11 @@ package body Expect_Interface is
(Get_Script (Self.Inst), Arguments_Count => 2);
begin
Set_Nth_Arg (C, 1, Self.Inst);
Set_Nth_Arg (C, 2, To_String (Self.Unmatched_Output));
Set_Nth_Arg
(C, 2,
To_UTF8_And_Warn
(Self.Kernel,
To_String (Self.Unmatched_Output)));
Dummy := Execute (Self.Before_Kill, C);
Free (C);
end;
@@ -297,8 +336,16 @@ package body Expect_Interface is
Dummy : Boolean;
begin
Set_Nth_Arg (C, 1, Self.Inst);
Set_Nth_Arg (C, 2, S (Matches (0).First .. Matches (0).Last));
Set_Nth_Arg (C, 3, S (Index .. Matches (0).First - 1));
Set_Nth_Arg
(C, 2,
To_UTF8_And_Warn
(Self.Kernel,
S (Matches (0).First .. Matches (0).Last)));
Set_Nth_Arg
(C, 3,
To_UTF8_And_Warn
(Self.Kernel,
S (Index .. Matches (0).First - 1)));
Dummy := Execute (Self.On_Match, C);
Free (C);
end;

View File

@@ -0,0 +1,29 @@
""" Test a case that might cause a crash when processing
output from a GPS.Process spawned process: if the output
is not utf-8 valid, a storage_error might occur as Python
attempts to create a string object from this.
"""
from gs_utils.internal.utils import run_test_driver, timeout
@run_test_driver
def driver():
def generic_on_match(process, match, since_last):
GPS.Console("Messages").write(since_last.strip() + '\n' + match)
# Launch a process that outputs a non utf-8 string,
# with a function called on match which writes the text
# to the console.
GPS.Process("cat toto.txt", regexp=r".+", on_match=generic_on_match)
waited = 0
# The non-utf8 text should not cause a problem and the subsequent
# text "REACHED THIS" should be present in the console.
while "REACHED THIS" not in GPS.Console().get_text():
waited += 1
if waited > 20:
GPS.exit(1)
yield timeout(100)

View File

@@ -0,0 +1 @@
title: 'non_utf8_process_output'

View File

@@ -0,0 +1,3 @@
HELLO
ééééééééééééééééééééééééééééééééééééééééééééééééééééééé
REACHED THIS