Files
langkit/langkit/documentation.py
Pierre-Marie de Rodat 8cfeaae439 C API: split analysis context allocation/initialization
Since the analysis context initialization may trigger events (e.g.
"prelude file parsed"), language bindings (in particular Python) may
need to create/register their wrapper around the context that is just
created so that they can use that wrapper when invoking the callback.

Having allocation and initialization as a single atomic step makes this
impossible: this change splits them in the C API and update the
Python/OCaml bindings to allow this.

TN: VA12-016
(cherry picked from commit 0b029ac1b0)
2022-12-12 12:45:57 +00:00

1965 lines
70 KiB
Python

"""
This module provides the various documentation parts to be part of the
generated code.
This is useful in the context of bindings: many docstrings are very similar,
there, leading to the usual maintenance problem with code duplication. This
module is an attempt to reduce code duplication and thus to avoid the
corresponding maintenance problems.
In order to achieve this, we consider that there are entities to document in
various places and that some entities appear in multiple contexts (for instance
in the Ada code and in all bindings). We assign these entities unique names
("documentation entity name"), assign them a documentation chunk here and refer
to them in code generation.
Because some documentations must vary depending on the context (for instance,
the interface of entities can depend on the language binding that exposes
them), these chunks are implemented as Mako templates.
All templates can use the "lang" parameter, which contains "ada", "c" or
"python" depending on the binding for which we generate documentation.
"""
from __future__ import annotations
import inspect
import textwrap
from dataclasses import dataclass, replace
from typing import (
Any, Callable, Dict, List, Optional, Set, TYPE_CHECKING, Union, cast
)
import docutils.frontend
import docutils.nodes
import docutils.parsers
import docutils.parsers.rst
import docutils.parsers.rst.roles
import docutils.utils
from mako.template import Template
from langkit.diagnostics import (
Severity, check_source_language,
diagnostic_context, get_current_location
)
from langkit.utils import memoized
if TYPE_CHECKING:
from typing import Protocol
from langkit.compile_context import CompileCtx
from langkit.compiled_types import ASTNodeType, CompiledType
NodeNameGetter = Callable[[CompileCtx, ASTNodeType], str]
else:
# We want to support Python 3.7, and typing.Protocol was introduced in
# Python 3.8. Our only use of this, as a base class, is to type-check the
# codebase using Mypy, so we can just use object at runtime.
Protocol = object
class DocDatabase:
"""
Database for documentation entries.
"""
def __init__(self, dict: Dict[str, Template]) -> None:
self._dict = dict
"""
Documentation database.
"""
self._used: Set[str] = set()
"""
Set of names for documentation database that were actually used.
"""
def __getitem__(self, key: str) -> Template:
self._used.add(key)
return self._dict[key]
def report_unused(self) -> None:
"""
Report all documentation entries that have not been used on the
standard output. Either they should be used, or they should be removed.
"""
unused = set(self._dict) - self._used
if unused:
print('The following documentation entries were not used in code'
' generation:')
for k in sorted(unused):
print(' ', k)
def instantiate_templates(doc_dict: Dict[str, str]) -> DocDatabase:
"""
Turn a pure text documentation database into a Mako template one.
:param doc_dict: Documentation database to convert.
"""
return DocDatabase({key: Template(val) for key, val in doc_dict.items()})
base_langkit_docs = {
#
# Main analysis types
#
'langkit.analysis_context_type': """
This type represents a context for all source analysis. This is the
first type you need to create to use ${ctx.lib_name}. It will contain
the results of all analysis, and is the main holder for all the data.
You can create several analysis contexts if you need to, which enables
you, for example to:
* analyze several different projects at the same time;
* analyze different parts of the same projects in parallel.
In the current design, contexts always keep all of their analysis units
allocated. If you need to get this memory released, the only option at
your disposal is to destroy your analysis context instance.
% if lang == 'c':
This structure is partially opaque: some fields are exposed to allow
direct access, for performance concerns.
% endif
""",
'langkit.analysis_unit_type': """
This type represents the analysis of a single file.
% if lang != 'python':
This type has strong-reference semantics and is ref-counted.
Furthermore, a reference to a unit contains an implicit reference to
the context that owns it. This means that keeping a reference to a unit
will keep the context and all the unit it contains allocated.
% endif
% if lang == 'c':
This structure is partially opaque: some fields are exposed to allow
direct access, for performance concerns.
% endif
""",
'langkit.node_type': """
Data type for all nodes. Nodes are assembled to make up a tree. See
the node primitives below to inspect such trees.
% if lang != 'python':
Unlike for contexts and units, this type has weak-reference semantics:
keeping a reference to a node has no effect on the decision to keep the
unit that it owns allocated. This means that once all references to the
context and units related to a node are dropped, the context and its
units are deallocated and the node becomes a stale reference: most
operations on it will raise a ``Stale_Reference_Error``.
Note that since reparsing an analysis unit deallocates all the nodes it
contains, this operation makes all reference to these nodes stale as
well.
% endif
""",
'langkit.node_kind_type': """
Kind of AST nodes in parse trees.
""",
'langkit.symbol_type': """
Reference to a symbol. Symbols are owned by analysis contexts, so they
must not outlive them. This type exists only in the C API, and roughly
wraps the corresponding Ada type (an array fat pointer).
""",
'langkit.env_rebindings_type': """
Data type for env rebindings. For internal use only.
""",
'langkit.sloc_type': """
Location in a source file. Line and column numbers are one-based.
""",
'langkit.sloc_range_type': """
Location of a span of text in a source file.
""",
'langkit.token_kind': """
Kind for this token.
""",
'langkit.token_reference_type': """
Reference to a token in an analysis unit.
""",
'langkit.text_type': """
String encoded in UTF-32 (native endianness).
""",
'langkit.text_type.chars': """
Address for the content of the string.
""",
'langkit.text_type.length': """
Size of the string (in characters).
""",
'langkit.big_integer_type': """
Arbitrarily large integer.
""",
'langkit.diagnostic_type': """
Diagnostic for an analysis unit: cannot open the source file, parsing
error, ...
""",
'langkit.exception_kind_type': """
Enumerated type describing all possible exceptions that need to be
handled in the C bindings.
""",
'langkit.exception_type': """
Holder for native exceptions-related information. Memory management
for this and all the fields is handled by the library: one just has to
make sure not to keep references to it.
.. TODO: For the moment, this structure contains already formatted
information, but depending on possible future Ada runtime
improvements, this might change.
""",
'langkit.exception_type.kind': """
The kind of this exception.
""",
'langkit.exception_type.information': """
Message and context information associated with this exception.
""",
'langkit.invalid_unit_name_error': """
Raised when an invalid unit name is provided.
""",
'langkit.native_exception': """
Exception raised in language bindings when the underlying C API reports
an unexpected error that occurred in the library.
This kind of exception is raised for internal errors: they should never
happen in normal situations and if they are raised at some point, it
means the library state is potentially corrupted.
Nevertheless, the library does its best not to crash the program,
materializing internal errors using this kind of exception.
""",
'langkit.precondition_failure': """
Exception raised when an API is called while its preconditions are not
satisfied.
""",
'langkit.property_error': """
Exception that is raised when an error occurs while evaluating any
${'function' if lang == 'ada' else 'AST node method'}
whose name starts with
``${'P_' if lang == 'ada' else 'p_'}``. This is the only exceptions
that such functions can raise.
""",
'langkit.invalid_symbol_error': """
Exception raise when an invalid symbol is passed to a subprogram.
""",
'langkit.stale_reference_error': """
Exception raised while trying to access data that was deallocated. This
happens when one tries to use a node whose unit has been reparsed, for
instance.
""",
'langkit.unknown_charset': """
Raised by lexing functions (``${ctx.lib_name}.Lexer``) when the input
charset is not supported.
""",
'langkit.invalid_input': """
Raised by lexing functions (``${ctx.lib_name}.Lexer``) when the input
contains an invalid byte sequence.
""",
'langkit.syntax_error': """
Subprograms may raise this when they try to parse invalid syntax.
% if lang == "ocaml":
Also raised if a field in a parsing node is null due to a syntax
error.
% else:
Note that this does *not* concern analysis unit getters, which
create diagnostic vectors for such errors.
% endif
""",
'langkit.file_read_error': """
Subprograms may raise this when they cannot open a source file. Note
that this does *not* concern analysis unit getters, which create
diagnostic vectors for such errors.
""",
'langkit.introspection.bad_type_error': """
Raised when introspection functions (``${ctx.lib_name}.Introspection``)
are provided mismatching types/values.
""",
'langkit.introspection.out_of_bounds_error': """
Raised when introspection functions (``${ctx.lib_name}.Introspection``)
are passed an out of bounds index.
""",
'langkit.rewriting.template_format_error': """
Exception raised when a template has an invalid syntax, such as badly
formatted placeholders.
""",
'langkit.rewriting.template_args_error': """
Exception raised when the provided arguments for a template don't match
what the template expects.
""",
'langkit.rewriting.template_instantiation_error': """
Exception raised when the instantiation of a template cannot be parsed.
""",
#
# Analysis primitives
#
'langkit.create_context': """
Create a new analysis context.
``Charset`` will be used as a default charset to decode input sources
in analysis units. Please see ``GNATCOLL.Iconv`` for several supported
charsets. Be careful: passing an unsupported charset is not guaranteed
to raise an error here. If no charset is provided,
``"${ctx.default_charset}"`` is the default.
.. TODO: Passing an unsupported charset here is not guaranteed to
raise an error right here, but this would be really helpful for
users.
When ``With_Trivia`` is true, the parsed analysis units will contain
trivias.
If provided, ``File_Reader`` will be used to fetch the contents of
source files instead of the default, which is to just read it from the
filesystem and decode it using the regular charset rules. Note that if
provided, all parsing APIs that provide a buffer are forbidden, and any
use of the rewriting API with the returned context is rejected.
If provided, ``Unit_Provider`` will be used to query the file name
that corresponds to a unit reference during semantic analysis. If
it is ``${null}``, the default one is used instead.
If provided, ``Event_Handler`` will be notified when various events
happen.
``Tab_Stop`` is a positive number to describe the effect of tabulation
characters on the column number in source files.
""",
'langkit.allocate_context': """
Allocate a new analysis context.
""",
'langkit.initialize_context': """
Initialize an analysis context. Must be called right after
``Allocate_Context`` on its result.
Having separate primitives for allocation/initialization allows library
bindings to have a context wrapper (created between the two calls)
ready when callbacks that happen during context initialization (for
instance "unit parsed" events).
""",
'langkit.context_incref': """
Increase the reference count to an analysis context.
% if lang == 'c':
Return the reference for convenience.
% endif
""",
'langkit.context_decref': """
Decrease the reference count to an analysis context. Destruction
happens when the ref-count reaches 0.
""",
'langkit.context_hash': """
Return a hash for this context, to be used in hash tables.
""",
'langkit.context_symbol': """
If the given string is a valid symbol, yield it as a symbol and return
true. Otherwise, return false.
""",
'langkit.context_discard_errors_in_populate_lexical_env': """
Debug helper. Set whether ``Property_Error`` exceptions raised in
``Populate_Lexical_Env`` should be discarded. They are by default.
""",
'langkit.context_set_logic_resolution_timeout': """
If ``Timeout`` is greater than zero, set a timeout for the resolution
of logic equations. The unit is the number of steps in ANY/ALL
relations. If ``Timeout`` is zero, disable the timeout. By default,
the timeout is ``100 000`` steps.
""",
'langkit.get_unit_from_file': """
Create a new analysis unit for ``Filename`` or return the existing one
if any. If ``Reparse`` is true and the analysis unit already exists,
reparse it from ``Filename``.
``Rule`` controls which grammar rule is used to parse the unit.
Use ``Charset`` in order to decode the source. If ``Charset`` is empty
then use the context's default charset.
If any failure occurs, such as file opening, decoding, lexing or
parsing failure, return an analysis unit anyway: errors are described
as diagnostics of the returned analysis unit.
% if lang == 'ada':
It is invalid to pass ``True`` to ``Reparse`` if a rewriting context is
active.
% endif
""",
'langkit.get_unit_from_buffer': """
Create a new analysis unit for ``Filename`` or return the existing one
if any. Whether the analysis unit already exists or not, (re)parse it
from the source code in ``Buffer``.
``Rule`` controls which grammar rule is used to parse the unit.
Use ``Charset`` in order to decode the source. If ``Charset`` is empty
then use the context's default charset.
If any failure occurs, such as file opening, decoding, lexing or
parsing failure, return an analysis unit anyway: errors are described
as diagnostics of the returned analysis unit.
% if lang == 'ada':
Calling this is invalid if a rewriting context is active.
% endif
""",
'langkit.get_unit_from_provider': """
Create a new analysis unit for ``Name``/``Kind`` or return the existing
one if any. If ``Reparse`` is true and the analysis unit already
exists, reparse it from the on-disk source file.
The ``Name`` and ``Kind`` arguments are forwarded directly to query the
context's unit provider and get the filename for the returned unit.
% if lang == 'python':
``Name`` must be a string, while ``Kind`` must be an
``AnalysisUnitKind`` enumeration value.
% endif
See the documentation of the relevant unit provider for their exact
semantics.
Use ``Charset`` in order to decode the source. If ``Charset`` is empty
then use the context's default charset.
If the unit name cannot be tuned into a file name,
% if lang == 'ada':
raise an ``Invalid_Unit_Name_Error`` exception.
% elif lang == 'python':
raise an ``InvalidUnitNameError`` exception.
% else:
return ``${null}``.
% endif
If any other failure occurs, such as file opening, decoding, lexing or
parsing failure, return an analysis unit anyway: errors are described
as diagnostics of the returned analysis unit.
% if lang == 'ada':
It is invalid to pass ``True`` to ``Reparse`` if a rewriting context is
active.
% endif
""",
'langkit.unit_context': """
Return the context that owns this unit.
""",
'langkit.unit_hash': """
Return a hash for this unit, to be used in hash tables.
""",
'langkit.unit_reparse_file': """
Reparse an analysis unit from the associated file.
Use ``Charset`` in order to decode the source. If ``Charset`` is empty
then use the context's default charset.
If any failure occurs, such as decoding, lexing or parsing failure,
diagnostic are emitted to explain what happened.
""",
'langkit.unit_reparse_buffer': """
Reparse an analysis unit from a buffer.
Use ``Charset`` in order to decode the source. If ``Charset`` is empty
then use the context's default charset.
If any failure occurs, such as decoding, lexing or parsing failure,
diagnostic are emitted to explain what happened.
""",
'langkit.unit_reparse_generic': """
Reparse an analysis unit from a buffer, if provided, or from the
original file otherwise. If ``Charset`` is empty or ``${null}``, use
the last charset successfuly used for this unit, otherwise use it to
decode the content of the source file.
If any failure occurs, such as decoding, lexing or parsing failure,
diagnostic are emitted to explain what happened.
""",
'langkit.unit_root': """
Return the root node for this unit, or ``${null}`` if there is none.
""",
'langkit.unit_first_token': """
Return a reference to the first token scanned in this unit.
""",
'langkit.unit_last_token': """
Return a reference to the last token scanned in this unit.
""",
'langkit.unit_token_count': """
Return the number of tokens in this unit.
""",
'langkit.unit_trivia_count': """
Return the number of trivias in this unit. This is 0 for units that
were parsed with trivia analysis disabled.
""",
'langkit.unit_text': """
Return the source buffer associated to this unit.
""",
'langkit.unit_lookup_token': """
Look for a token in this unit that contains the given source location.
If this falls before the first token, return the first token. If this
falls between two tokens, return the token that appears before. If this
falls after the last token, return the last token. If there is no token
in this unit, return no token.
""",
'langkit.unit_dump_lexical_env': """
Debug helper: output the lexical envs for the given analysis unit.
""",
'langkit.unit_filename': """
Return the filename this unit is associated to.
% if lang == 'c':
The returned string is dynamically allocated and the caller must
free it when done with it.
% endif
""",
'langkit.unit_diagnostic_count': """
Return the number of diagnostics associated to this unit.
""",
'langkit.unit_diagnostic': """
Get the Nth diagnostic in this unit and store it into
``*diagnostic_p``. Return zero on failure (when N is too big).
""",
'langkit.unit_has_diagnostics': """
Return whether this unit has associated diagnostics.
""",
'langkit.unit_diagnostics': """
Return an array that contains the diagnostics associated to this unit.
""",
'langkit.unit_populate_lexical_env': """
Create lexical environments for this analysis unit, according to the
specifications given in the language spec.
If not done before, it will be automatically called during semantic
analysis. Calling it before enables one to control where the latency
occurs.
Depending on whether errors are discarded (see
``Discard_Errors_In_Populate_Lexical_Env``),
% if lang == 'c':
return ``0`` on failure and ``1`` on success.
% else:
raise a ``Property_Error`` on failure.
% endif
""",
#
# General AST node primitives
#
'langkit.node_kind': """
Return the kind of this node.
""",
'langkit.kind_name': """
Helper for textual dump: return the kind name for this node.
% if lang == 'c':
The returned string is a copy and thus must be free'd by the caller.
% endif
""",
'langkit.node_unit': """
Return the analysis unit that owns this node.
""",
'langkit.node_text': """
Return the source buffer slice corresponding to the text that spans
between the first and the last tokens of this node.
Note that this returns the empty string for synthetic nodes.
""",
'langkit.node_sloc_range': """
Return the spanning source location range for this node.
Note that this returns the sloc of the parent for synthetic nodes.
""",
'langkit.lookup_in_node': """
Return the bottom-most node from in ``Node`` and its children which
contains ``Sloc``, or ``${null}`` if there is none.
""",
'langkit.node_children_count': """
Return the number of children in this node.
""",
'langkit.node_child': """
Return the Nth child for in this node's fields and store it into
``*child_p``. Return zero on failure (when ``N`` is too big).
""",
'langkit.node_is_null': """
Return whether this node is a null node reference.
""",
'langkit.node_is_equivalent': """
Return whether the two nodes are equivalent.
""",
'langkit.node_hash': """
Return a hash for the given node.
""",
'langkit.node_is_token_node': """
Return whether this node is a node that contains only a single token.
""",
'langkit.node_is_synthetic': """
Return whether this node is synthetic.
""",
'langkit.node_image': """
Return a representation of this node as a string.
""",
'langkit.entity_image': """
Return a representation of this entity as a string.
""",
'langkit.token_text': """
Return the text of the given token.
""",
'langkit.token_sloc_range': """
Return the source location range of the given token.
""",
'langkit.text_to_locale_string': """
Encode some text using the current locale. The result is dynamically
allocated: it is up to the caller to free it when done with it.
This is a development helper to make it quick and easy to print token
and diagnostic text: it ignores errors (when the locale does not
support some characters). Production code should use real conversion
routines such as libiconv's in order to deal with UTF-32 texts.
""",
'langkit.free': """
Free dynamically allocated memory.
This is a helper to free objects from dynamic languages.
""",
'langkit.destroy_text': """
If this text object owns the buffer it references, free this buffer.
Note that even though this accepts a pointer to a text object, it does
not deallocates the text object itself but rather the buffer it
references.
""",
'langkit.symbol_text': """
Return the text associated to this symbol.
""",
'langkit.create_big_integer': """
Create a big integer from its string representation (in base 10).
""",
'langkit.big_integer_text': """
Return the string representation (in base 10) of this big integer.
""",
'langkit.big_integer_decref': """
Decrease the reference count for this big integer.
""",
'langkit.get_versions': """
Allocate strings to represent the library version number and build date
and put them in Version/Build_Date. Callers are expected to call free()
on the returned string once done.
""",
'langkit.string_type': """
Type to contain Unicode text data.
""",
'langkit.create_string': """
Create a string value from its content (UTF32 with native endianity).
Note that the CONTENT buffer argument is copied: the returned value
does not contain a reference to it.
""",
'langkit.string_dec_ref': """
Decrease the reference count for this string.
""",
#
# Iterators
#
'langkit.iterator_type': """
% if lang == 'python':
Base class for Ada iterator bindings.
% endif;
An iterator provides a mean to retrieve values one-at-a-time.
% if lang == 'ada':
Resource management for iterators is automatic.
% endif
Currently, each iterator is bound to the analysis context used to
create it. Iterators are invalidated as soon as any unit of that
analysis is reparsed. Due to the nature of iterators (lazy
computations), this invalidation is necessary to avoid use of
inconsistent state, such as an iterator trying to use analysis context
data that is stale.
""",
'langkit.iterator_next': """
% if lang == 'c':
Set the next value from the iterator in the given element pointer.
Return ``1`` if successful, otherwise ``0``.
% elif lang == 'ada':
Set the next value from the iterator in the given out argument.
Return True if successful, otherwise False.
% elif lang == 'python':
Return the next value from the iterator. Raises ``StopIteration`` if
there is no more element to retrieve.
% endif
This raises a ``Stale_Reference_Error`` exception if the iterator is
invalidated.
""",
#
# File readers
#
'langkit.file_reader_type': """
Interface to override how source files are fetched and decoded.
""",
'langkit.create_file_reader': """
Create a file reader. When done with it, the result must be passed to
``${capi.get_name('dec_ref_file_reader')}``.
Pass as ``data`` a pointer to hold your private data: it will be passed
to all callbacks below.
``destroy`` is a callback that is called by
``${capi.get_name('dec_ref_file_reader')}`` to leave a chance to free
resources that ``data`` may hold.
``read`` is a callback. For a given filename/charset and whether to
read the BOM (Byte Order Mark), it tries to fetch the contents of the
source file, returned in ``Contents``. If there is an error, it must
return it in ``Diagnostic`` instead.
""",
'langkit.file_reader_read': """
Read the content of the source at the given filename, decoding it using
the given charset and decoding the byte order mark if ``Read_BOM`` is
true.
If there is an error during this process, append an error message to
Diagnostics. In that case, Contents is considered uninitialized.
Otherwise, allocate a Text_Type buffer, fill it and initialize Contents
to refer to it.
""",
'langkit.file_reader_inc_ref': """
Create an ownership share for this file reader.
""",
'langkit.file_reader_dec_ref': """
Release an ownership share for this file reader. This destroys the file
reader if there are no shares left.
% if lang == 'ada':
Return whether there are no ownership shares left.
% endif
""",
'langkit.file_reader_destroy_type': """
Callback type for functions that are called when destroying a file
reader.
""",
'langkit.file_reader_read_type': """
Callback type for functions that are called to fetch the decoded source
buffer for a requested filename.
""",
#
# Event handlers
#
'langkit.create_event_handler': """
Create an event handler. When done with it, the result must be passed
to ``${capi.get_name('dec_ref_event_handler')}``.
Pass as ``data`` a pointer to hold your private data: it will be passed
to all callbacks below.
``destroy`` is a callback that is called by
``${capi.get_name('dec_ref_event_handler')}`` to leave a chance to
free resources that ``data`` may hold.
``unit_requested`` is a callback that will be called when a unit is
requested.
.. warning:: Please note that the unit requested callback can be called
*many* times for the same unit, so in all likeliness, those events
should be filtered if they're used to forward diagnostics to the
user.
``unit_parsed`` is a callback that will be called when a unit is
parsed.
""",
'langkit.event_handler_type': """
Interface to handle events sent by the analysis context.
""",
'langkit.event_handler_unit_requested_callback': """
Callback that will be called when a unit is requested from the context
``Context``.
``Name`` is the name of the requested unit.
``From`` is the unit from which the unit was requested.
``Found`` indicates whether the requested unit was found or not.
``Is_Not_Found_Error`` indicates whether the fact that the unit was not
found is an error or not.
.. warning:: The interface of this callback is probably subject to
change, so should be treated as experimental.
""",
'langkit.event_handler_unit_parsed_callback': """
Callback that will be called when any unit is parsed from the context
``Context``.
``Unit`` is the resulting unit.
``Reparsed`` indicates whether the unit was reparsed, or whether it was
the first parse.
""",
'langkit.event_handler_destroy_callback': """
Callback type for functions that are called when destroying an event
handler.
""",
'langkit.event_handler_inc_ref': """
Create an ownership share for this event handler.
""",
'langkit.event_handler_dec_ref': """
Release an ownership share for this event handler. This destroys the
event handler if there are no shares left.
% if lang == 'ada':
Return whether there are no ownership shares left.
% endif
""",
#
# Unit providers
#
'langkit.unit_provider_type': """
Interface to fetch analysis units from a name and a unit kind.
The unit provider mechanism provides an abstraction which assumes that
to any couple (unit name, unit kind) we can associate at most one
source file. This means that several couples can be associated to the
same source file, but on the other hand, only one one source file can
be associated to a couple.
This is used to make the semantic analysis able to switch from one
analysis units to another.
See the documentation of each unit provider for the exact semantics of
the unit name/kind information.
""",
'langkit.unit_provider_get_unit_filename': """
Return the filename corresponding to the given unit name/unit kind.
% if lang == 'ada':
Raise a ``Property_Error``
% else:
Return ``${null}``
% endif
if the given unit name is not valid.
""",
'langkit.unit_provider_get_unit_from_name': """
Fetch and return the analysis unit referenced by the given unit name.
% if lang == 'ada':
Raise a ``Property_Error``
% else:
Return ``${null}``
% endif
if the given unit name is not valid.
""",
'langkit.unit_provider_inc_ref': """
Create an ownership share for this unit provider.
""",
'langkit.unit_provider_dec_ref': """
Release an ownership share for this unit provider. This destroys the
unit provider if there are no shares left.
% if lang == 'ada':
Return whether there are no ownership shares left.
% endif
""",
'langkit.create_unit_provider': """
Create a unit provider. When done with it, the result must be passed to
``${capi.get_name('destroy_unit_provider')}``.
Pass as ``data`` a pointer to hold your private data: it will be passed
to all callbacks below.
``destroy`` is a callback that is called by
``${capi.get_name('destroy_unit_provider')}`` to leave a chance to free
resources that ``data`` may hold.
``get_unit_from_node`` is a callback. It turns an analysis unit
reference represented as a node into an analysis unit. It should return
``${null}`` if the node is not a valid unit name representation.
``get_unit_from_name`` is a callback similar to ``get_unit_from_node``
except it takes an analysis unit reference represented as a string.
""",
'langkit.unit_provider_destroy_type': """
Callback type for functions that are called when destroying a unit file
provider type.
""",
'langkit.unit_provider_get_unit_filename_type': """
Callback type for functions that are called to turn a unit reference
encoded as a unit name into an analysis unit.
""",
'langkit.unit_provider_get_unit_from_name_type': """
Callback type for functions that are called to turn a unit reference
encoded as a unit name into an analysis unit.
""",
#
# Misc
#
'langkit.get_last_exception': """
Return exception information for the last error that happened in the
current thread. Will be automatically allocated on error and free'd on
the next error.
""",
'langkit.exception_name': """
Return the name of the given exception kind. Callers are responsible
for free'ing the result.
""",
'langkit.synthetic_nodes': """
Set of nodes that are synthetic.
Parsers cannot create synthetic nodes, so these correspond to no source
text. These nodes are created dynamically for convenience during
semantic analysis.
""",
'langkit.token_kind_name': """
Return a human-readable name for a token kind.
% if lang == 'c':
The returned string is dynamically allocated and the caller must free
it when done with it.
If the given kind is invalid, return ``NULL`` and set the last
exception accordingly.
% endif
""",
'langkit.token_next': """
Return a reference to the next token in the corresponding analysis
unit.
""",
'langkit.token_previous': """
Return a reference to the previous token in the corresponding analysis
unit.
""",
'langkit.token_range_until': """
Return ${'an iterator on' if lang == 'python' else ''} the list of
tokens that spans between
% if lang == 'python':
``self`` and ``other``
% else:
the two input tokens
% endif
(included). This returns an empty list if the first token appears after
the other one in the source code.
% if lang == 'python':
Raise a ``ValueError`` if both tokens come from different analysis
units.
% endif
""",
'langkit.token_is_equivalent': """
Return whether ``L`` and ``R`` are structurally equivalent tokens. This
means that their position in the stream won't be taken into account,
only the kind and text of the token.
""",
'langkit.token_range_text': """
Compute the source buffer slice corresponding to the text that spans
between the ``First`` and ``Last`` tokens (both included). This yields
an empty slice if ``Last`` actually appears before ``First``.
% if lang == 'c':
Put the result in ``RESULT``.
% endif
% if lang == 'ada':
This raises a ``Constraint_Error``
% elif lang == 'c':
This returns ``0``
% elif lang == 'python':
This raises a ``ValueError``
% endif
if ``First`` and ``Last`` don't belong to the same analysis unit.
% if lang == 'c':
Return ``1`` if successful.
% endif
""",
'langkit.token_is_trivia': """
Return whether this token is a trivia. If it's not, it's a regular
token.
""",
'langkit.token_index': """
% if lang == 'ada':
One-based
% else:
Zero-based
% endif
index for this token/trivia. Tokens and trivias get their own index
space.
""",
#
# Misc
#
'langkit.rewriting.rewriting_handle_type': """
Handle for an analysis context rewriting session
""",
'langkit.rewriting.unit_rewriting_handle_type': """
Handle for the process of rewriting an analysis unit. Such handles are
owned by a Rewriting_Handle instance.
""",
'langkit.rewriting.node_rewriting_handle_type': """
Handle for the process of rewriting an AST node. Such handles are owned
by a Rewriting_Handle instance.
""",
'langkit.rewriting.context_handle': """
Return the rewriting handle associated to Context, or
No_Rewriting_Handle if Context is not being rewritten.
""",
'langkit.rewriting.handle_context': """
Return the analysis context associated to Handle
""",
'langkit.rewriting.start_rewriting': """
Start a rewriting session for Context.
This handle will keep track of all changes to do on Context's analysis
units. Once the set of changes is complete, call the Apply procedure to
actually update Context. This makes it possible to inspect the "old"
Context state while creating the list of changes.
There can be only one rewriting session per analysis context, so this
will raise an Existing_Rewriting_Handle_Error exception if Context
already has a living rewriting session.
""",
'langkit.rewriting.abort_rewriting': """
Discard all modifications registered in Handle and close Handle
""",
'langkit.rewriting.apply': """
Apply all modifications to Handle's analysis context. If that worked,
close Handle and return (Success => True). Otherwise, reparsing did not
work, so keep Handle and its Context unchanged and return details about
the error that happened.
""",
'langkit.rewriting.unit_handles': """
Return the list of unit rewriting handles in the given context handle
for units that the Apply primitive will modify.
""",
'langkit.rewriting.unit_handle': """
Return the rewriting handle corresponding to Unit
""",
'langkit.rewriting.handle_unit': """
Return the unit corresponding to Handle
""",
'langkit.rewriting.root': """
Return the node handle corresponding to the root of the unit which
Handle designates.
""",
'langkit.rewriting.set_root': """
Set the root node for the unit Handle to Root. This unties the previous
root handle. If Root is not No_Node_Rewriting_Handle, this also ties
Root to Handle.
Root must not already be tied to another analysis unit handle.
""",
'langkit.rewriting.unit_unparse': """
Return the text associated to the given unit.
""",
'langkit.rewriting.node_handle': """
Return the rewriting handle corresponding to Node.
The owning unit of Node must be free of diagnostics.
""",
'langkit.rewriting.handle_node': """
Return the node which the given rewriting Handle relates to. This can
be the null entity if this handle designates a new node.
""",
'langkit.rewriting.node_context': """
Return a handle for the rewriting context to which Handle belongs
""",
'langkit.rewriting.unparse': """
Turn the given rewritten node Handles designates into text. This is the
text that is used in Apply in order to re-create an analysis unit.
""",
'langkit.rewriting.kind': """
Return the kind corresponding to Handle's node
""",
'langkit.rewriting.tied': """
Return whether this node handle is tied to an analysis unit. If it is
not, it can be passed as the Child parameter to Set_Child.
""",
'langkit.rewriting.parent': """
Return a handle for the node that is the parent of Handle's node. This
is ``No_Rewriting_Handle`` for a node that is not tied to any tree yet.
""",
'langkit.rewriting.children_count': """
Return the number of children the node represented by Handle has
""",
'langkit.rewriting.child': """
Return a handle corresponding to the Index'th child of the node that
Handle represents. Index is 1-based.
""",
'langkit.rewriting.set_child': """
If Child is ``No_Rewriting_Node``, untie the Handle's ``Index``'th
child to this tree, so it can be attached to another one. Otherwise,
Child must have no parent as it will be tied to ``Handle``'s tree.
""",
'langkit.rewriting.text': """
Return the text associated to the given token node.
""",
'langkit.rewriting.set_text': """
Override text associated to the given token node.
""",
'langkit.rewriting.replace': """
If Handle is the root of an analysis unit, untie it and set New_Node as
its new root. Otherwise, replace Handle with New_Node in Handle's
parent node.
Note that:
* Handle must be tied to an existing analysis unit handle.
* New_Node must not already be tied to another analysis unit handle.
""",
'langkit.rewriting.insert_child': """
Assuming Handle refers to a list node, insert the given Child node to
be in the children list at the given index.
The given Child node must not be tied to any analysis unit.
""",
'langkit.rewriting.append_child': """
Assuming Handle refers to a list node, append the given Child node to
the children list.
The given Child node must not be tied to any analysis unit.
""",
'langkit.rewriting.remove_child': """
Assuming Handle refers to a list node, remove the child at the given
Index from the children list.
""",
'langkit.rewriting.clone': """
Create a clone of the Handle node tree. The result is not tied to any
analysis unit tree.
""",
'langkit.rewriting.create_node': """
Create a new node of the given Kind, with empty text (for token nodes)
or children (for regular nodes).
""",
'langkit.rewriting.create_token_node': """
Create a new token node with the given Kind and Text
""",
'langkit.rewriting.create_regular_node': """
Create a new regular node of the given Kind and assign it the given
Children.
Except for lists, which can have any number of children, the
size of Children must match the number of children associated to the
given Kind. Besides, all given children must not be tied.
""",
'langkit.rewriting.create_from_template': """
Create a tree of new nodes from the given Template string, replacing
placeholders with nodes in Arguments and parsed according to the given
grammar Rule.
""",
#
# Python-specific
#
'langkit.python.AnalysisUnit.TokenIterator': """
Iterator over the tokens in an analysis unit.
""",
'langkit.python.AnalysisUnit.iter_tokens': """
Iterator over the tokens in an analysis unit.
""",
'langkit.python.AnalysisUnit.diagnostics': """
Diagnostics for this unit.
""",
'langkit.python.Token.__eq__': """
Return whether the two tokens refer to the same token in the same unit.
Note that this does not actually compares the token data.
""",
'langkit.python.Token.__lt__': """
Consider that None comes before all tokens. Then, sort by unit, token
index, and trivia index.
""",
'langkit.python.Token.to_data': """
Return a dict representation of this Token.
""",
'langkit.python.FileReader.__init__': """
This constructor is an implementation detail, and is not meant to be
used directly.
""",
'langkit.python.UnitProvider.__init__': """
This constructor is an implementation detail, and is not meant to be
used directly.
""",
'langkit.python.root_node.__bool__': """
Return always True so that checking a node against None can be done as
simply as::
if node:
...
""",
'langkit.python.root_node.__iter__': """
Return an iterator on the children of this node.
""",
'langkit.python.root_node.__len__': """
Return the number of ${pyapi.root_astnode_name} children this node has.
""",
'langkit.python.root_node.__getitem__': """
Return the Nth ${pyapi.root_astnode_name} child this node has.
This handles negative indexes the same way Python lists do. Raise an
IndexError if "key" is out of range.
""",
'langkit.python.root_node.iter_fields': """
Iterate through all the fields this node contains.
Return an iterator that yields (name, value) couples for all abstract
fields in this node. If "self" is a list, field names will be
"item_{n}" with "n" being the index.
""",
'langkit.python.root_node.dump_str': """
Dump the sub-tree to a string in a human-readable format.
""",
'langkit.python.root_node.dump': """
Dump the sub-tree in a human-readable format on the given file.
:param str indent: Prefix printed on each line during the dump.
:param file file: File in which the dump must occur.
""",
'langkit.python.root_node.findall': """
Helper for finditer that will return all results as a list. See
finditer's documentation for more details.
""",
'langkit.python.root_node.find': """
Helper for finditer that will return only the first result. See
finditer's documentation for more details.
""",
'langkit.python.root_node.finditer': """
Find every node corresponding to the passed predicates.
:param ast_type_or_pred: If supplied with a subclass of
${pyapi.root_astnode_name}, will constrain the resulting collection
to only the instances of this type or any subclass. If supplied
with a predicate, it will apply the predicate on every node and
keep only the ones for which it returns True. If supplied with a
list of subclasses of ${pyapi.root_astnode_name}, it will match all
instances of any of them.
:param kwargs: Allows the user to filter on attributes of the node. For
every key value association, if the node has an attribute of name
key that has the specified value, then the child is kept.
""",
'langkit.python.root_node.parent_chain': """
Return the parent chain of self. Self will be the first element,
followed by the first parent, then this parent's parent, etc.
""",
'langkit.python.root_node.tokens': """
Return an iterator on the range of tokens that self encompasses.
""",
'langkit.python.root_node.to_data': """
Return a nested python data-structure, constituted only of standard
data types (dicts, lists, strings, ints, etc), and representing the
portion of the AST corresponding to this node.
""",
'langkit.python.root_node.to_json': """
Return a JSON representation of this node.
""",
'langkit.python.root_node.is_a': """
Shortcut for isinstance(self, types).
:rtype: bool
""",
'langkit.python.root_node.cast': """
Fluent interface style method. Return ``self``, raise an error if self
is not of type ``typ``.
:type typ: () -> T
:rtype: T
""",
}
null_names = {
'ada': 'null',
'c': 'NULL',
'python': 'None',
'ocaml': 'None',
}
todo_markers = {
'ada': '???',
'c': 'TODO:',
'python': 'TODO:',
'ocaml': 'TODO:',
}
class Formatter(Protocol):
def __call__(self,
text: str,
column: int,
width: int = 79) -> str: ...
def get_line(node: Any) -> Optional[int]:
"""
Utility function to get the closest line for a given rst node (since
not all nodes have line information).
"""
if node is None:
return None
if node.line is not None:
return node.line
else:
return get_line(node.parent)
class PassthroughNode(docutils.nodes.Element):
"""
This node implements passthrough behavior, so that we re-emit the
corresponding text. We use it for roles that we want to propagate to
generated doc.
"""
def __init__(self, name: str, text: str):
self.name = name
self.text = text
self.children = []
@staticmethod
def role_fn(
name: Any, rawtext: Any, text: Any, lineno: Any,
inliner: Any, options: Any = {}, content: Any = []
) -> Any:
"""
Role function to create a ``PassthroughNode``.
"""
return [PassthroughNode(name, text)], []
class LangkitTypeRef(docutils.nodes.reference):
"""
Specific langkit node for a reference to a Langkit CompiledType. Meant to
be replaced by our visitor by a type ref node that is understandable in the
role of the language for which we generate documentation, or if there isn't
such a role, a simple text reference.
"""
@memoized
def get_type(self) -> Optional[CompiledType]:
"""
Return the langkit type this node references.
"""
from langkit.compiled_types import resolve_type
c = self['compiled_type']
if c:
return resolve_type(c)
return None
@staticmethod
def role_fn(
name: Any, rawtext: Any, text: Any, lineno: Any,
inliner: Any, options: Any = {}, content: Any = []
) -> Any:
"""
Role function to create a ``LangkitTypeRef`` node.
"""
from langkit.compiled_types import T
ct = getattr(T, text, None)
node = LangkitTypeRef(rawtext, text, compiled_type=ct, **options)
return [node], []
docutils.parsers.rst.roles.register_local_role(
"typeref", LangkitTypeRef.role_fn
)
#
# Global data used by docutils visitors
#
TAGNAMES_WITH_SURROUNDINGS = {
"literal": "``",
"emphasis": "*",
"strong": "**",
}
SUPPORTED_ADMONITIONS = [
"attention", "caution", "danger", "error", "hint", "important", "note",
"tip", "warning", "admonition"
]
EXPLICITLY_FORBIDDEN_TAGS = ['title_reference']
SUPPORTED_TAGS = [
"#text", "comment", "field", "paragraph", "list_item", "literal_block",
"enumerated_list", "field_name", "document", "bullet_list",
"system_message", "problematic", "warning", "field_list",
"field_name", "field_body", "block_quote", "reference", "target"
] + SUPPORTED_ADMONITIONS + list(TAGNAMES_WITH_SURROUNDINGS.keys())
SKIP_CHILDREN = ["field_name", "literal_block"]
class RstCommentChecker(docutils.nodes.GenericNodeVisitor):
"""
Visitor that will be run on docstrings to check that they're correct,
e.g. that they respect the subset of ReST that we're supposed to use,
and the restrictions that we impose on docstrings.
"""
def default_visit(self, node: Any) -> None:
# Forward error messages from the parser itself
if node.tagname in "system_message":
self.check(node[0], False, node[0].astext())
# Forbid title references, because they're useless in docstrings,
# and they're a commonly occuring error in our docstrings.
elif node.tagname == 'title_reference':
self.check(
node, False,
"title_reference nodes are forbidden in docstrings. You "
"probably meant to use double backquotes.",
)
# Warn for all node types that are not explicitly supported
elif node.tagname not in SUPPORTED_TAGS:
self.check(
node, False,
f"Unsupported Rst tag: {node.tagname}. Will be excluded "
"from output."
)
# Skip children of nodes that need to be skipped, so that we don't
# mistakenly encounter an unsupported node that we would have skipped
# anyway.
if node.tagname in SKIP_CHILDREN:
raise docutils.nodes.SkipChildren()
def unknown_visit(self, node: docutils.nodes.node) -> None:
if isinstance(node, LangkitTypeRef):
ct = node.get_type()
self.check(node, ct is not None, "Wrong type reference")
raise docutils.nodes.SkipChildren()
def unknown_departure(self, node: docutils.nodes.node) -> None:
pass
def check(self, node: Any, condition: bool, message: str) -> None:
"""
Utility method, to run a language level langkit check with a proper
sloc inside of the rst docstring, if possible.
"""
loc = get_current_location()
if loc is not None:
node_line = get_line(node)
if node_line is not None:
loc = replace(loc, line=loc.line + node_line + 1)
with diagnostic_context(loc):
check_source_language(
condition,
message,
severity=Severity.warning,
ok_for_codegen=True
)
@staticmethod
def check_doc(doc: Optional[str]) -> None:
"""
Shortcut to run this visitor on a given (potentially ``None``)
docstring.
"""
if doc:
rst_doc = rst_document(doc)
visitor = RstCommentChecker(rst_doc)
rst_doc.walk(visitor)
class RstCommentFormatter(docutils.nodes.GenericNodeVisitor):
"""
Docutils ``NodeVisitor``, meant to output a formatted rst docstring, with
text properly wrapped for the given indentation/prefix.
"""
@dataclass
class BlockContext:
node: docutils.nodes.node
"""
The node for the block.
"""
initial_prefix: str
"""
The prefix string for the first line of the block.
"""
subsequent_prefix: str
"""
The prefix string for the subsequent lines of the block.
"""
parts: List[str]
"""
The list of text parts that make up the block, and that will be
populated in the visit function.
"""
def __init__(
self,
document: docutils.nodes.document,
prefix: str,
get_node_name: NodeNameGetter,
type_role_name: str = '',
width: int = 79
):
"""
Construct a new ``RstCommentFormatter`` visitor.
:param document: The document this visitor will iterate on.
:param prefix: The string prefix with which we want to prefix every
line of the resulting output. Typically constituted of the
whitespace for the desired indentation, plus the prefix for the
comment style of the output language.
:param get_node_name: Callable that will return the formatted name of a
langkit node type, in the desired style for the output language.
:param type_role_name: String that represents the name of the role for
type references in the doc for the output language.
:param width: Maximum width to which to wrap the output.
"""
super().__init__(document)
# Instantiation data, used to parametrize the output
self.width = width
self.prefix = prefix
self.get_node_name = get_node_name
self.type_role_name = type_role_name
# Context variables
self.surrounding = ""
"""
For text parts that require to be surrounded (like literal blocks),
this will be set to the appropriate surrounding text.
"""
self.in_enumerated_list = False
self.enumerated_list_item_no = 1
# State variables for the visitor
self.parts: List[str] = []
"""
List of toplevel parts to be concatenated at the end of the visit.
"""
self.block_context_stack: List[RstCommentFormatter.BlockContext] = []
"""
Stack of block contexts. Contains what is needed to format a block, in
order:
"""
@property
def current_parts(self) -> List[str]:
"""
Shortcut property to return the list of current parts for the topmost
entry on the block context stack.
"""
return self.block_context_stack[-1].parts
def append_part(self, part: str) -> None:
"""
Append a part to the list of toplevel parts.
"""
if part:
self.parts.append(part)
def append_context(
self,
node: docutils.nodes.Node,
initial_prefix: str = '',
subsequent_prefix: str = ''
) -> None:
"""
Append a new block context to the block context stack.
"""
if self.block_context_stack:
self.append_part(self.wrap(''.join(self.current_parts)))
self.current_parts.clear()
self.block_context_stack.append(
RstCommentFormatter.BlockContext(
node, initial_prefix, subsequent_prefix, []
)
)
@property
def text(self) -> str:
"""
Get the constructed docstring's text out of this visitor.
"""
lines = f"\n{self.prefix}\n".join(self.parts).splitlines()
return "\n".join(l.rstrip() for l in lines)
@property
def initial_indent(self) -> str:
"""
Helper property to get the indent text for the first line of a
formatted block in the current context.
"""
if self.block_context_stack:
return ''.join(
t.subsequent_prefix for t in self.block_context_stack[:-1]
) + self.block_context_stack[-1].initial_prefix
else:
return ''
@property
def subsequent_indent(self) -> str:
"""
Helper property to get the indent text for the subsequent lines of a
formatted block in the current context.
"""
return ''.join(t.subsequent_prefix for t in self.block_context_stack)
def wrap(self, text: str) -> str:
"""
Helper method to wrap text with the desired settings.
"""
return "\n".join(textwrap.wrap(
text, self.width,
initial_indent=self.prefix + self.initial_indent,
subsequent_indent=self.prefix + self.subsequent_indent
))
def unknown_visit(self, node: docutils.nodes.node) -> None:
"""
Visit function for langkit specific nodes.
"""
from langkit.compile_context import get_context
from langkit.compiled_types import ASTNodeType
if isinstance(node, LangkitTypeRef):
ct = node.get_type()
if not ct:
return
# TODO: For the moment ``:typeref:`` will only work for AST node
# types.
assert isinstance(ct, ASTNodeType)
type_name = self.get_node_name(get_context(), ct)
if self.type_role_name:
self.current_parts.append(
f"{self.type_role_name}`{type_name}`"
)
else:
self.current_parts.append(f"``{type_name}``")
raise docutils.nodes.SkipChildren()
elif isinstance(node, PassthroughNode):
self.current_parts.append(f":{node.name}:`{node.text}`")
def unknown_departure(self, node: docutils.nodes.node) -> None:
pass
def default_visit(self, node: docutils.nodes.node) -> None:
"""
Visit function for generic docutils/sphinx nodes.
"""
if node.tagname == "#text":
# Text nodes are added to parts, and will be later built via the
# builder.
self.current_parts.append(
f"{self.surrounding}{node.astext()}{self.surrounding}"
)
elif node.tagname == "comment":
# Comments are kept, but maybe we should get rid of them?
self.append_context(node, ".. ", " ")
elif node.tagname == "field":
self.append_context(node, f":{node[0].astext()}: ", ' ')
elif node.tagname == "paragraph":
# If a paragraph is not part of a larger block that has a
# builder, then add a simple builder for the paragraph that will
# simply wrap.
if not self.block_context_stack:
self.append_context(node, '', '')
elif node.tagname == "list_item":
if self.in_enumerated_list:
initial_indent = f'{self.enumerated_list_item_no}. '
self.enumerated_list_item_no += 1
else:
initial_indent = '* '
subsequent_indent = ' ' * len(initial_indent)
self.append_context(node, initial_indent, subsequent_indent)
elif node.tagname in SUPPORTED_ADMONITIONS:
self.append_context(node, f".. {node.tagname}:: ", ' ')
elif node.tagname in TAGNAMES_WITH_SURROUNDINGS.keys():
# If we have a node that will "surround" the text inside with
# some special character, set the ``surrounding`` variable,
# which will be used when we get to the inside text.
# NOTE: Inline tags with surrounding characters are handled via a
# separate mechanism than builders, but we might be able to have
# a stack of builders and only use builders, if we do some
# adjustments.
self.surrounding = TAGNAMES_WITH_SURROUNDINGS[node.tagname]
elif node.tagname == "literal_block":
# Literal blocks are code blocks. For those we want to bypass
# the mechanism we use for every other block that will wrap the
# resulting text, and instead preserve the original formatting.
classes = set(node["classes"])
classes = classes - {"code"}
try:
lang = classes.pop()
except KeyError:
lang = ""
self.append_part(
f"{self.prefix}{self.subsequent_indent}.. code:: {lang}"
.rstrip()
)
self.append_part("\n".join(
f"{self.prefix}{self.subsequent_indent} {l}"
for l in node.astext().splitlines()
))
elif node.tagname == "enumerated_list":
# TODO: Add support for nested enumerated lists
self.in_enumerated_list = True
self.enumerated_list_item_no = 1
if node.tagname not in SUPPORTED_TAGS or node.tagname in SKIP_CHILDREN:
# Skip nodes that are not supported: we know that we have warned
# the user previously, now we can just ignore the content. It
# will be stripped from the output.
raise docutils.nodes.SkipChildren()
def default_departure(self, node: Any) -> None:
"""
Departure (post children traversal) visit function for generic
docutils/sphinx nodes.
"""
if node.tagname == "enumerated_list":
self.in_enumerated_list = False
elif node.tagname in [
"field", "list_item", "paragraph", "comment"
] + SUPPORTED_ADMONITIONS:
if self.block_context_stack[-1].node == node:
self.append_part(self.wrap(''.join(self.current_parts)))
# Reset data
self.block_context_stack.pop()
elif node.tagname in TAGNAMES_WITH_SURROUNDINGS:
self.surrounding = ""
default_settings = docutils.frontend.OptionParser(
components=(docutils.parsers.rst.Parser,)
).get_default_values()
# Don't emit any report on stdout/stderr
default_settings.report_level = 4
@memoized
def rst_document(text: str) -> docutils.nodes.document:
"""
From a given docstring, return a docutils document.
.. note:: This might have large strings as inputs, and as such is maybe
not optimal. We will probably be able to get rid of that by directly
storing the docutils document in entities at some stage, but this is a
good first step solution.
"""
document = docutils.utils.new_document("<input>", default_settings)
parser = docutils.parsers.rst.Parser()
parser.parse(text, document)
return document
def make_formatter(
prefix: str = '',
suffix: str = '',
line_prefix: str = '',
get_node_name: NodeNameGetter = lambda c, n: n.name.lower,
type_role_name: str = ''
) -> Formatter:
"""
Create a formatter function which, given a text that contains a list of
paragraphs, return a list of lines that are formatted correctly, with
wrapped paragraphs, the given ``line_prefix`` for each line, and given
``prefix`` and ``suffix``.
If the ``:typeref:`` Langkit role is used in the docstring,
``get_node_name`` will be used to translate the name to the proper name in
the given language, and ``type_role_name`` will be used as the name for the
type reference role in the given language.
The first line of the outputted text will not be indented, since that's our
need in templates.
The resulting function has the following parameters:
* ``text``, which is the original text of the docstring.
* ``column``, which is the starting column at which the resulting docstring
must be indented.
* ``width``, which is an optional parameter which defaults to ``79``,
specifying the maximum width the text must be wrapped to.
"""
def formatter(text: str, column: int, width: int = 79) -> str:
text = inspect.cleandoc(text)
indent = ' ' * column
pfx = indent + line_prefix
document = rst_document(text)
visitor = RstCommentFormatter(
document, prefix=pfx, get_node_name=get_node_name,
type_role_name=type_role_name
)
document.walkabout(visitor)
return "\n".join([prefix, visitor.text, indent + suffix]).strip()
return formatter
class DocPrinter(Protocol):
def __call__(self,
entity: Union[str, CompiledType],
column: int = 0,
lang: str = '',
**kwargs: Any) -> str: ...
def create_doc_printer(
lang: str,
formatter: Formatter,
) -> DocPrinter:
"""
Return a function that prints documentation.
:param lang: The default language for which we generate documentation.
:param formatter: Function that formats text into source code
documentation. See the ``format_*`` functions above.
"""
def func(entity:
Union[str, CompiledType],
column: int = 0,
lang: str = lang,
**kwargs: Any) -> str:
"""
:param entity: Name for the entity to document, or entity to document.
:param column: Indentation level for the result.
:param lang: Language for the documentation.
:param kwargs: Parameters to be passed to the specific formatter.
"""
from langkit.compile_context import get_context
from langkit.compiled_types import T
ctx = get_context()
doc: str
if isinstance(entity, str):
doc = ctx.documentations[entity].render(
ctx=get_context(),
capi=ctx.c_api_settings,
pyapi=ctx.python_api_settings,
lang=lang,
null=null_names[lang],
TODO=todo_markers[lang],
T=T,
)
elif entity.doc:
doc = entity.doc
else:
doc = ""
return formatter(doc, column, **kwargs)
func.__name__ = '{}_doc'.format(lang)
return func
# The following are functions which return a docstring as formatted text for
# the given language. See ``make_formatter``'s documentation for the arguments.
format_text = make_formatter()
format_ada = make_formatter(
line_prefix='-- ',
get_node_name=lambda ctx, node: node.entity.api_name,
type_role_name=':ada:ref:'
)
format_c = make_formatter(
prefix='/*', line_prefix=' * ', suffix=' */',
# In the C header, there is only one node type, so use kind enumerators
# instead.
get_node_name=(lambda ctx, node:
ctx.c_api_settings.get_name(node.kwless_raw_name)),
)
format_python = make_formatter(
prefix='"""', suffix='"""',
get_node_name=(lambda ctx, node:
ctx.python_api_settings.type_public_name(node)),
type_role_name=':py:class:'
)
format_ocaml = make_formatter(
prefix='(**', line_prefix=' * ', suffix=' *)',
get_node_name=(lambda ctx, node:
ctx.ocaml_api_settings
.type_public_name(node.entity))
)
# The following are functions which return formatted source code documentation
# for an entity. Their arguments are:
#
# * An entity (string or compiled_types.CompiledType subclass) from which the
# documentation is retreived.
#
# * A column number (zero if not provided) used to indent the generated
# documentation.
#
# * Arbitrary keyword arguments to pass to the documentation Mako templates.
ada_doc = create_doc_printer(
'ada', cast(Formatter, format_ada),
)
c_doc = create_doc_printer(
'c', cast(Formatter, format_c),
)
py_doc = create_doc_printer(
'python', cast(Formatter, format_python),
)
ocaml_doc = create_doc_printer(
'ocaml', cast(Formatter, format_ocaml),
)
def ada_c_doc(entity: Union[str, CompiledType], column: int = 0) -> str:
"""
Shortcut to render documentation for a C entity with an Ada doc syntax.
:type entity: str|compiled_types.CompiledType
:type column: int
"""
return ada_doc(entity, column, lang='c')