PC19-012: Keep offset in tokens

2026-02-12 12:28:12 -08:00 · 2016-12-19 12:52:30 +01:00
parent 9e2a1f48bd
commit 25f795d386
5 changed files with 14 additions and 2 deletions
--- a/langkit/templates/lexer/pkg_lexer_body_ada.mako
+++ b/langkit/templates/lexer/pkg_lexer_body_ada.mako
@@ -28,6 +28,7 @@ package body ${_self.ada_api_settings.lib_name}.Lexer is
      Text_Length              : size_t;
      Start_Line, End_Line     : Unsigned_32;
      Start_Column, End_Column : Unsigned_16;
+      Offset                   : Unsigned_32;
   end record
      with Convention => C;
   type Interface_Token_Access is access all Quex_Token_Type;
@@ -190,7 +191,8 @@ package body ${_self.ada_api_settings.lib_name}.Lexer is
                      T        => (Kind       =>
                                      Token_Kind'Enum_Val (Token.Id),
                                   Text       => Text,
-                                   Sloc_Range => Sloc_Range)));
+                                   Sloc_Range => Sloc_Range,
+                                   Offset     => Token.Offset)));

                  Last_Token_Was_Trivia := True;
               end if;
@@ -208,7 +210,8 @@ package body ${_self.ada_api_settings.lib_name}.Lexer is
           (TDH.Tokens,
            (Kind       => Token_Kind'Enum_Val (Token.Id),
             Text       => Text,
-             Sloc_Range => Sloc_Range));
+             Sloc_Range => Sloc_Range,
+             Offset     => Token.Offset));
         Prepare_For_Trivia;

      % if lexer.token_actions['WithTrivia']:
--- a/langkit/templates/lexer/pkg_lexer_spec_ada.mako
+++ b/langkit/templates/lexer/pkg_lexer_spec_ada.mako
@@ -1,5 +1,7 @@
 ## vim: filetype=makoada

+with Interfaces;          use Interfaces;
+
 with Langkit_Support.Slocs; use Langkit_Support.Slocs;
 with Langkit_Support.Text;  use Langkit_Support.Text;
 with Langkit_Support.Token_Data_Handlers;
@@ -27,6 +29,8 @@ package ${_self.ada_api_settings.lib_name}.Lexer is
      --  keywords but actual text for identifiers.

      Sloc_Range : Source_Location_Range;
+
+      Offset     : Unsigned_32;
   end record;

   package Token_Data_Handlers is new Langkit_Support.Token_Data_Handlers
--- a/langkit/templates/lexer/quex_interface_body_c.mako
+++ b/langkit/templates/lexer/quex_interface_body_c.mako
@@ -58,6 +58,7 @@ ${capi.get_name("next_token")}(Lexer* lexer, struct token* tok) {
    tok->end_line = lexer->buffer_tk.end_line;
    tok->start_column = lexer->buffer_tk._column_n;
    tok->end_column = lexer->buffer_tk.end_column;
+    tok->offset = lexer->buffer_tk.offset;

    return tok->id != 0;
 }
--- a/langkit/templates/lexer/quex_interface_header_c.mako
+++ b/langkit/templates/lexer/quex_interface_header_c.mako
@@ -17,6 +17,7 @@ struct token {
    /* Source location for this token.  */
    uint32_t start_line, end_line;
    uint16_t start_column, end_column;
+    uint32_t offset;
 };


--- a/langkit/templates/lexer/quex_lexer_spec.mako
+++ b/langkit/templates/lexer/quex_lexer_spec.mako
@@ -21,15 +21,18 @@ token_type {
        end_line   : size_t;
        end_column : uint16_t;
        last_id    : uint16_t;
+        offset     : uint32_t;
    }

    take_text {
        if( Begin != LexemeNull ) {
            self.text = Begin;
            self.len = (size_t)(End - Begin);
+            self.offset = (uint32_t)(Begin - analyzer.buffer._memory._front);
        } else {
            self.text = LexemeNull;
            self.len = 0;
+            self.offset = 0;
        }

        /* This token copied the text from the chunk into the string,