#!/usr/bin/env python3 """ NOTE: this script has been developed with AI assistance. Generate an Ada package with embedded tool switches database. This script runs various tools with their help options and extracts the switches and their associated documentation into a structured JSON database, which is then embedded into an Ada package specification as string constants. """ import argparse import json import re import subprocess import sys from typing import Dict, List, Optional class HelpParser: """Base class for parsing tool help output.""" def parse(self, help_text: str) -> Dict[str, str]: """Parse help text and return a dictionary of switches to docs. Args: help_text: The raw help output from the tool Returns: Dictionary mapping switch names to their documentation """ raise NotImplementedError("Subclasses must implement parse()") class GnatHelpParser(HelpParser): """Parser for GNAT compiler help output.""" def parse(self, help_text: str) -> Dict[str, str]: """Parse GNAT help output format. GNAT help has lines like: -switch Documentation text that may span multiple lines or: -switch Documentation text --long-switch More documentation """ switches = {} current_switch = None current_doc = [] lines = help_text.split("\n") for line in lines: # Check if line starts a new switch (starts with 1-5 spaces # and a dash). Handles " -switch", " -switch", "--switch" match = re.match(r"^ {1,5}(-+\S+(?:,\s*-+\S+)*)\s+(.*)$", line) if match: # Save previous switch if any if current_switch: switches[current_switch] = " ".join(current_doc).strip() # Start new switch current_switch = match.group(1) current_doc = [match.group(2)] if match.group(2).strip() else [] elif current_switch: # Continuation line - check if indented documentation stripped = line.strip() if stripped and not line.startswith( " ." ): # Skip mode value descriptions # Only add if doesn't look like new section header if not re.match(r"^[A-Z][a-z].*:$", stripped): current_doc.append(stripped) elif stripped == "": # Empty line might indicate end of switch's docs pass # Don't forget the last switch if current_switch: switches[current_switch] = " ".join(current_doc).strip() return switches class GenericHelpParser(HelpParser): """Generic parser for standard help output formats.""" def parse(self, help_text: str) -> Dict[str, str]: """Parse generic help output. Tries to identify lines that look like: -switch, --long-switch Documentation or: -switch Documentation """ switches = {} current_switch = None current_doc = [] lines = help_text.split("\n") for line in lines: # Try to match common switch patterns match = re.match(r"^\s{0,4}(-+\S+(?:,\s*-+\S+)*)\s{2,}(.*)$", line) if match: # Save previous switch if any if current_switch: switches[current_switch] = " ".join(current_doc).strip() # Start new switch (may be multiple comma-separated) switch_list = match.group(1) current_switch = switch_list current_doc = [match.group(2)] elif current_switch and line.startswith(" " * 6) and line.strip(): # Continuation line with significant indentation current_doc.append(line.strip()) elif line.strip() == "": # Empty line might end current switch if current_switch and current_doc: switches[current_switch] = " ".join(current_doc).strip() current_switch = None current_doc = [] # Don't forget the last switch if current_switch and current_doc: switches[current_switch] = " ".join(current_doc).strip() return switches class GprbuildHelpParser(HelpParser): """Parser for gprbuild and gprclean style help output.""" def parse(self, help_text: str) -> Dict[str, str]: """Parse gprbuild/gprclean help output format. Help output has lines like: -switch Documentation text -switch Documentation text (gprclean uses 1 space) --long-switch Documentation text that may span multiple lines --db dir Docs with space-separated argument """ switches = {} current_switch = None current_doc = [] lines = help_text.split("\n") for line in lines: # Check if line starts new switch (starts with 1-2 spaces # and a dash). First try switches with space-separated args match_with_arg = re.match( r"^ {1,2}(-+[^\s]+)\s+([a-z<][^\s]*)\s+(.+)$", line ) # Then try regular switches with or without doc on same line # Pattern captures switch names with optional # equals/bracket modifiers and comma-separated alternatives match_simple = re.match( r"^ {1,2}((?:-+[^\s,]+(?:=\S+|\[=\S+\])?(?:,\s*-+[^\s,]+" r"(?:=\S+|\[=\S+\])?)*,?))(?:\s+(.*))?$", line, ) if match_with_arg: # Switch with space-separated arg, like "--db dir" # Save previous switch if any if current_switch: switches[current_switch] = " ".join(current_doc).strip() switch_name = match_with_arg.group(1) arg_name = match_with_arg.group(2) doc_text = match_with_arg.group(3).strip() # Include argument in switch name current_switch = f"{switch_name} {arg_name}" current_doc = [doc_text] if doc_text else [] elif match_simple: # Save previous switch if any if current_switch: switches[current_switch] = " ".join(current_doc).strip() # Regular switch, possibly with doc on same line current_switch = match_simple.group(1) doc_text = match_simple.group(2) current_doc = ( [doc_text.strip()] if doc_text and doc_text.strip() else [] ) elif current_switch: # Check if continuation line (starts with more spaces # than switch lines) stripped = line.strip() if stripped and line.startswith(" "): # Continuation line with indentation (at least 3 # spaces, more than switch lines) current_doc.append(stripped) elif not line.strip(): # Empty line ends the current switch if current_doc: switches[current_switch] = " ".join(current_doc).strip() current_switch = None current_doc = [] elif not line.startswith(" -"): # Line part of section headers or other text, end # current if current_doc: switches[current_switch] = " ".join(current_doc).strip() current_switch = None current_doc = [] # Don't forget the last switch if current_switch and current_doc: switches[current_switch] = " ".join(current_doc).strip() # Clean up docs: remove leading "-" followed by spaces # (used by tools like gnatcheck) for switch in switches: doc = switches[switch] if doc.startswith("-"): # Remove the leading dash and any spaces after it switches[switch] = re.sub(r"^-\s*", "", doc) return switches def run_tool_help(tool_command: str) -> Optional[str]: """Run a tool with its help option and capture output. Args: tool_command: The full command to run (e.g., "gnat --help-ada") Returns: The help output as a string, or None if the command failed """ try: result = subprocess.run( tool_command.split(), capture_output=True, text=True, timeout=30 ) # Many tools output help to stderr, so combine both output = result.stdout + result.stderr return output if output.strip() else None except ( subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError, ) as e: print(f"Error running '{tool_command}': {e}", file=sys.stderr) return None def select_parser(tool_name: str) -> HelpParser: """Select the appropriate parser for a given tool. Args: tool_name: Name of the tool (extracted from command) Returns: An appropriate HelpParser instance """ tool_name_lower = tool_name.lower() if tool_name_lower in ("gnat", "gnatprove"): return GnatHelpParser() elif tool_name_lower in ( "gprbuild", "gprclean", "gprinstall", "gnatcheck", "arm-eabi-gnatemu", ): return GprbuildHelpParser() else: return GenericHelpParser() def extract_tool_name(tool_command: str) -> str: """Extract the tool name from a command string. Args: tool_command: Full command like "gnat --help-ada" or "/path/to/tool --help" Returns: Just the tool name, e.g., "gnat" """ import os tool_path = tool_command.split()[0] return os.path.basename(tool_path) def normalize_switch_name(switch_name: str) -> str: """Normalize switch names to prefer long over short versions. When a switch has both long and short versions separated by comma (e.g., "--width, -w" or "-v, --verbose"), extract only the long version. If no long version exists, use the part before the comma. Args: switch_name: The switch name, possibly with multiple versions Returns: The normalized switch name (preferring long version) """ # Check if there's a comma indicating multiple versions if "," not in switch_name: return switch_name # Split by comma to get individual versions parts = [part.strip() for part in switch_name.split(",")] # Look for a long version (starts with --) long_versions = [p for p in parts if p.startswith("--")] if long_versions: return long_versions[0] # No long version found, use the first part (before comma) return parts[0] def escape_ada_string(s: str) -> str: """Escape a string for use in an Ada string literal. Args: s: The string to escape Returns: The escaped string suitable for Ada string literals """ # In Ada, quotes are doubled to escape them return s.replace('"', '""') def split_string_for_ada(json_str: str, max_length: int = 1000) -> List[str]: """Split a JSON string into chunks suitable for Ada string constants. Args: json_str: The JSON string to split max_length: Maximum length of each chunk (to avoid Ada line length limits) Returns: List of string chunks """ chunks = [] i = 0 while i < len(json_str): # Try to find a good breaking point (after comma or closing brace) end = min(i + max_length, len(json_str)) if end < len(json_str): # Look back for a good break point for j in range(end, max(i, end - 100), -1): if json_str[j] in ",}]": end = j + 1 break chunks.append(json_str[i:end]) i = end return chunks def generate_ada_package(database: dict, output_dir: str): """Generate an Ada package spec with the JSON database embedded. Args: database: The tool database dictionary output_dir: Directory where to write the Ada package spec """ import os # Convert database to compact JSON string json_str = json.dumps(database, ensure_ascii=False, separators=(",", ":")) # Escape for Ada escaped_json = escape_ada_string(json_str) # Split into manageable chunks chunks = split_string_for_ada(escaped_json, max_length=2000) # Generate Ada package spec ada_code = [] ada_code.append("-- Automatically generated, do not edit.") ada_code.append("") ada_code.append("pragma Style_Checks (Off);") ada_code.append("") ada_code.append("package LSP.GPR_Completions.Tools.Database is") ada_code.append("") # Generate string constants for each chunk for i, chunk in enumerate(chunks, 1): ada_code.append(f' Db{i} : constant String := "{chunk}";') ada_code.append("") # Generate the main concatenated constant db_parts = " & ".join(f"Db{i}" for i in range(1, len(chunks) + 1)) ada_code.append(f" Db : constant String := {db_parts};") ada_code.append("") ada_code.append("end LSP.GPR_Completions.Tools.Database;") # Write to file output_file = os.path.join(output_dir, "lsp-gpr_completions-tools-database.ads") with open(output_file, "w", encoding="utf-8") as f: f.write("\n".join(ada_code)) print(f"\nAda package written to: {output_file}") print(f"Total string chunks: {len(chunks)}") print(f"Total JSON size: {len(json_str)} characters") def generate_database(tool_configs: List[str], output_dir: str, pretty: bool = True): """Generate the Ada package database from tool help outputs. Args: tool_configs: List of tool commands (e.g., ["gnat --help-ada"]) output_dir: Directory where to write the Ada package pretty: Unused (kept for compatibility) """ database = {} for tool_command in tool_configs: print(f"Processing: {tool_command}") # Extract tool name for the database key tool_name = extract_tool_name(tool_command) # Run the tool and get help output help_text = run_tool_help(tool_command) if not help_text: print( f" Warning: No help output received for '{tool_command}'", file=sys.stderr, ) continue # Select appropriate parser parser = select_parser(tool_name) # Parse the help text switches = parser.parse(help_text) if not switches: print( f" Warning: No switches parsed from '{tool_command}'", file=sys.stderr, ) continue # Normalize switch names to prefer long versions normalized_switches = { normalize_switch_name(switch): doc for switch, doc in switches.items() } print(f" Found {len(normalized_switches)} switches") # Store in database (use tool name as key) database[tool_name] = { "command": tool_command, "switches": normalized_switches, } # Generate Ada package generate_ada_package(database, output_dir) print(f"Total tools processed: {len(database)}") def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Generate Ada package with embedded tool switches database", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s -o source/gpr/generated "gnat --help-ada" %(prog)s -o source/gpr/generated "gnat --help-ada" "gnatprove --help" %(prog)s --default -o source/gpr/generated """, ) parser.add_argument( "tools", nargs="*", help='Tool commands to process (e.g., "gnat --help-ada")', ) parser.add_argument( "-o", "--output", default="source/gpr/generated", help="Output directory for Ada package " "(default: source/gpr/generated)", ) parser.add_argument( "--compact", action="store_true", help="Unused (kept for compatibility)", ) parser.add_argument( "--default", action="store_true", help="Process default GNAT tools " "(gnat --help-ada, gnatprove --help)", ) args = parser.parse_args() # Use default tools if --default is specified or no tools provided if args.default or not args.tools: default_tools = [ "gnat --help-ada", "gnatprove --help", "gprbuild --help", ] tools_to_process = default_tools else: tools_to_process = args.tools generate_database(tools_to_process, args.output, pretty=not args.compact) if __name__ == "__main__": main()