Files
ada_language_server/scripts/generate_tool_help_db.py
Anthony Leonardo Gracio 811b0604ce Handle Clean, Format, Check and Emulator packages
For tool switches completion.

Adapt the Python script to handle more switches formats
(e.g: switches with a short and long version, like '-v,
--verbose').

For eng/ide/ada_language_server#1741
2025-12-19 09:06:30 +00:00

526 lines
17 KiB
Python
Executable File

#!/usr/bin/env python3
"""
NOTE: this script has been developed with AI assistance.
Generate an Ada package with embedded tool switches database.
This script runs various tools with their help options and extracts the
switches and their associated documentation into a structured JSON database,
which is then embedded into an Ada package specification as string
constants.
"""
import argparse
import json
import re
import subprocess
import sys
from typing import Dict, List, Optional
class HelpParser:
"""Base class for parsing tool help output."""
def parse(self, help_text: str) -> Dict[str, str]:
"""Parse help text and return a dictionary of switches to docs.
Args:
help_text: The raw help output from the tool
Returns:
Dictionary mapping switch names to their documentation
"""
raise NotImplementedError("Subclasses must implement parse()")
class GnatHelpParser(HelpParser):
"""Parser for GNAT compiler help output."""
def parse(self, help_text: str) -> Dict[str, str]:
"""Parse GNAT help output format.
GNAT help has lines like:
-switch Documentation text that may span
multiple lines
or:
-switch Documentation text
--long-switch More documentation
"""
switches = {}
current_switch = None
current_doc = []
lines = help_text.split("\n")
for line in lines:
# Check if line starts a new switch (starts with 1-5 spaces
# and a dash). Handles " -switch", " -switch", "--switch"
match = re.match(r"^ {1,5}(-+\S+(?:,\s*-+\S+)*)\s+(.*)$", line)
if match:
# Save previous switch if any
if current_switch:
switches[current_switch] = " ".join(current_doc).strip()
# Start new switch
current_switch = match.group(1)
current_doc = [match.group(2)] if match.group(2).strip() else []
elif current_switch:
# Continuation line - check if indented documentation
stripped = line.strip()
if stripped and not line.startswith(
" ."
): # Skip mode value descriptions
# Only add if doesn't look like new section header
if not re.match(r"^[A-Z][a-z].*:$", stripped):
current_doc.append(stripped)
elif stripped == "":
# Empty line might indicate end of switch's docs
pass
# Don't forget the last switch
if current_switch:
switches[current_switch] = " ".join(current_doc).strip()
return switches
class GenericHelpParser(HelpParser):
"""Generic parser for standard help output formats."""
def parse(self, help_text: str) -> Dict[str, str]:
"""Parse generic help output.
Tries to identify lines that look like:
-switch, --long-switch Documentation
or:
-switch Documentation
"""
switches = {}
current_switch = None
current_doc = []
lines = help_text.split("\n")
for line in lines:
# Try to match common switch patterns
match = re.match(r"^\s{0,4}(-+\S+(?:,\s*-+\S+)*)\s{2,}(.*)$", line)
if match:
# Save previous switch if any
if current_switch:
switches[current_switch] = " ".join(current_doc).strip()
# Start new switch (may be multiple comma-separated)
switch_list = match.group(1)
current_switch = switch_list
current_doc = [match.group(2)]
elif current_switch and line.startswith(" " * 6) and line.strip():
# Continuation line with significant indentation
current_doc.append(line.strip())
elif line.strip() == "":
# Empty line might end current switch
if current_switch and current_doc:
switches[current_switch] = " ".join(current_doc).strip()
current_switch = None
current_doc = []
# Don't forget the last switch
if current_switch and current_doc:
switches[current_switch] = " ".join(current_doc).strip()
return switches
class GprbuildHelpParser(HelpParser):
"""Parser for gprbuild and gprclean style help output."""
def parse(self, help_text: str) -> Dict[str, str]:
"""Parse gprbuild/gprclean help output format.
Help output has lines like:
-switch Documentation text
-switch Documentation text (gprclean uses 1 space)
--long-switch
Documentation text that may span
multiple lines
--db dir Docs with space-separated argument
"""
switches = {}
current_switch = None
current_doc = []
lines = help_text.split("\n")
for line in lines:
# Check if line starts new switch (starts with 1-2 spaces
# and a dash). First try switches with space-separated args
match_with_arg = re.match(
r"^ {1,2}(-+[^\s]+)\s+([a-z<][^\s]*)\s+(.+)$", line
)
# Then try regular switches with or without doc on same line
# Pattern captures switch names with optional
# equals/bracket modifiers and comma-separated alternatives
match_simple = re.match(
r"^ {1,2}((?:-+[^\s,]+(?:=\S+|\[=\S+\])?(?:,\s*-+[^\s,]+"
r"(?:=\S+|\[=\S+\])?)*,?))(?:\s+(.*))?$",
line,
)
if match_with_arg:
# Switch with space-separated arg, like "--db dir"
# Save previous switch if any
if current_switch:
switches[current_switch] = " ".join(current_doc).strip()
switch_name = match_with_arg.group(1)
arg_name = match_with_arg.group(2)
doc_text = match_with_arg.group(3).strip()
# Include argument in switch name
current_switch = f"{switch_name} {arg_name}"
current_doc = [doc_text] if doc_text else []
elif match_simple:
# Save previous switch if any
if current_switch:
switches[current_switch] = " ".join(current_doc).strip()
# Regular switch, possibly with doc on same line
current_switch = match_simple.group(1)
doc_text = match_simple.group(2)
current_doc = (
[doc_text.strip()] if doc_text and doc_text.strip() else []
)
elif current_switch:
# Check if continuation line (starts with more spaces
# than switch lines)
stripped = line.strip()
if stripped and line.startswith(" "):
# Continuation line with indentation (at least 3
# spaces, more than switch lines)
current_doc.append(stripped)
elif not line.strip():
# Empty line ends the current switch
if current_doc:
switches[current_switch] = " ".join(current_doc).strip()
current_switch = None
current_doc = []
elif not line.startswith(" -"):
# Line part of section headers or other text, end
# current
if current_doc:
switches[current_switch] = " ".join(current_doc).strip()
current_switch = None
current_doc = []
# Don't forget the last switch
if current_switch and current_doc:
switches[current_switch] = " ".join(current_doc).strip()
# Clean up docs: remove leading "-" followed by spaces
# (used by tools like gnatcheck)
for switch in switches:
doc = switches[switch]
if doc.startswith("-"):
# Remove the leading dash and any spaces after it
switches[switch] = re.sub(r"^-\s*", "", doc)
return switches
def run_tool_help(tool_command: str) -> Optional[str]:
"""Run a tool with its help option and capture output.
Args:
tool_command: The full command to run (e.g., "gnat --help-ada")
Returns:
The help output as a string, or None if the command failed
"""
try:
result = subprocess.run(
tool_command.split(), capture_output=True, text=True, timeout=30
)
# Many tools output help to stderr, so combine both
output = result.stdout + result.stderr
return output if output.strip() else None
except (
subprocess.TimeoutExpired,
subprocess.CalledProcessError,
FileNotFoundError,
) as e:
print(f"Error running '{tool_command}': {e}", file=sys.stderr)
return None
def select_parser(tool_name: str) -> HelpParser:
"""Select the appropriate parser for a given tool.
Args:
tool_name: Name of the tool (extracted from command)
Returns:
An appropriate HelpParser instance
"""
tool_name_lower = tool_name.lower()
if tool_name_lower in ("gnat", "gnatprove"):
return GnatHelpParser()
elif tool_name_lower in (
"gprbuild",
"gprclean",
"gprinstall",
"gnatcheck",
"arm-eabi-gnatemu",
):
return GprbuildHelpParser()
else:
return GenericHelpParser()
def extract_tool_name(tool_command: str) -> str:
"""Extract the tool name from a command string.
Args:
tool_command: Full command like "gnat --help-ada" or
"/path/to/tool --help"
Returns:
Just the tool name, e.g., "gnat"
"""
import os
tool_path = tool_command.split()[0]
return os.path.basename(tool_path)
def normalize_switch_name(switch_name: str) -> str:
"""Normalize switch names to prefer long over short versions.
When a switch has both long and short versions separated by comma
(e.g., "--width, -w" or "-v, --verbose"), extract only the long
version. If no long version exists, use the part before the comma.
Args:
switch_name: The switch name, possibly with multiple versions
Returns:
The normalized switch name (preferring long version)
"""
# Check if there's a comma indicating multiple versions
if "," not in switch_name:
return switch_name
# Split by comma to get individual versions
parts = [part.strip() for part in switch_name.split(",")]
# Look for a long version (starts with --)
long_versions = [p for p in parts if p.startswith("--")]
if long_versions:
return long_versions[0]
# No long version found, use the first part (before comma)
return parts[0]
def escape_ada_string(s: str) -> str:
"""Escape a string for use in an Ada string literal.
Args:
s: The string to escape
Returns:
The escaped string suitable for Ada string literals
"""
# In Ada, quotes are doubled to escape them
return s.replace('"', '""')
def split_string_for_ada(json_str: str, max_length: int = 1000) -> List[str]:
"""Split a JSON string into chunks suitable for Ada string constants.
Args:
json_str: The JSON string to split
max_length: Maximum length of each chunk (to avoid Ada line
length limits)
Returns:
List of string chunks
"""
chunks = []
i = 0
while i < len(json_str):
# Try to find a good breaking point (after comma or closing brace)
end = min(i + max_length, len(json_str))
if end < len(json_str):
# Look back for a good break point
for j in range(end, max(i, end - 100), -1):
if json_str[j] in ",}]":
end = j + 1
break
chunks.append(json_str[i:end])
i = end
return chunks
def generate_ada_package(database: dict, output_dir: str):
"""Generate an Ada package spec with the JSON database embedded.
Args:
database: The tool database dictionary
output_dir: Directory where to write the Ada package spec
"""
import os
# Convert database to compact JSON string
json_str = json.dumps(database, ensure_ascii=False, separators=(",", ":"))
# Escape for Ada
escaped_json = escape_ada_string(json_str)
# Split into manageable chunks
chunks = split_string_for_ada(escaped_json, max_length=2000)
# Generate Ada package spec
ada_code = []
ada_code.append("-- Automatically generated, do not edit.")
ada_code.append("")
ada_code.append("pragma Style_Checks (Off);")
ada_code.append("")
ada_code.append("package LSP.GPR_Completions.Tools.Database is")
ada_code.append("")
# Generate string constants for each chunk
for i, chunk in enumerate(chunks, 1):
ada_code.append(f' Db{i} : constant String := "{chunk}";')
ada_code.append("")
# Generate the main concatenated constant
db_parts = " & ".join(f"Db{i}" for i in range(1, len(chunks) + 1))
ada_code.append(f" Db : constant String := {db_parts};")
ada_code.append("")
ada_code.append("end LSP.GPR_Completions.Tools.Database;")
# Write to file
output_file = os.path.join(output_dir, "lsp-gpr_completions-tools-database.ads")
with open(output_file, "w", encoding="utf-8") as f:
f.write("\n".join(ada_code))
print(f"\nAda package written to: {output_file}")
print(f"Total string chunks: {len(chunks)}")
print(f"Total JSON size: {len(json_str)} characters")
def generate_database(tool_configs: List[str], output_dir: str, pretty: bool = True):
"""Generate the Ada package database from tool help outputs.
Args:
tool_configs: List of tool commands (e.g., ["gnat --help-ada"])
output_dir: Directory where to write the Ada package
pretty: Unused (kept for compatibility)
"""
database = {}
for tool_command in tool_configs:
print(f"Processing: {tool_command}")
# Extract tool name for the database key
tool_name = extract_tool_name(tool_command)
# Run the tool and get help output
help_text = run_tool_help(tool_command)
if not help_text:
print(
f" Warning: No help output received for '{tool_command}'",
file=sys.stderr,
)
continue
# Select appropriate parser
parser = select_parser(tool_name)
# Parse the help text
switches = parser.parse(help_text)
if not switches:
print(
f" Warning: No switches parsed from '{tool_command}'",
file=sys.stderr,
)
continue
# Normalize switch names to prefer long versions
normalized_switches = {
normalize_switch_name(switch): doc for switch, doc in switches.items()
}
print(f" Found {len(normalized_switches)} switches")
# Store in database (use tool name as key)
database[tool_name] = {
"command": tool_command,
"switches": normalized_switches,
}
# Generate Ada package
generate_ada_package(database, output_dir)
print(f"Total tools processed: {len(database)}")
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Generate Ada package with embedded tool switches database",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s -o source/gpr/generated "gnat --help-ada"
%(prog)s -o source/gpr/generated "gnat --help-ada" "gnatprove --help"
%(prog)s --default -o source/gpr/generated
""",
)
parser.add_argument(
"tools",
nargs="*",
help='Tool commands to process (e.g., "gnat --help-ada")',
)
parser.add_argument(
"-o",
"--output",
default="source/gpr/generated",
help="Output directory for Ada package " "(default: source/gpr/generated)",
)
parser.add_argument(
"--compact",
action="store_true",
help="Unused (kept for compatibility)",
)
parser.add_argument(
"--default",
action="store_true",
help="Process default GNAT tools " "(gnat --help-ada, gnatprove --help)",
)
args = parser.parse_args()
# Use default tools if --default is specified or no tools provided
if args.default or not args.tools:
default_tools = [
"gnat --help-ada",
"gnatprove --help",
"gprbuild --help",
]
tools_to_process = default_tools
else:
tools_to_process = args.tools
generate_database(tools_to_process, args.output, pretty=not args.compact)
if __name__ == "__main__":
main()