diff --git a/tools/toc.py b/tools/toc.py index 9bdc8cca6..8ff50be94 100755 --- a/tools/toc.py +++ b/tools/toc.py @@ -1,89 +1,89 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" -Usage: python3 toc.py [-n] files.md... -Replace a "## TOC" heading in a Markdown file with a table of contents, -generated from the other headings in the file. Supports multiple files. -Headings must start with "##" signs to be detected. -""" - -import sys -import re -from collections import namedtuple - -toc_name = 'Contents' -valid_toc_headings = {'## TOC', '##TOC'} - -TocItem = namedtuple('TocItem', ['name', 'anchor', 'level']) -punctuation_regexp = re.compile(r'[^\w\- ]+') - -def name_to_anchor(name): - # GitHub's algorithm for generating anchors from headings - # https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb - anchor = name.strip().lower() # lowercase - anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation - anchor = anchor.replace(' ', '-') # replace spaces with dash - return anchor - -def get_toc_index(lines): - toc_index = None - for i, line in enumerate(lines): - if line.rstrip() in valid_toc_headings: - toc_index = i - break - return toc_index - -def get_toc_items(lines, toc_index): - for i, line in enumerate(lines): - if i <= toc_index: - continue - if line.startswith('##'): - name = line.lstrip('#') - level = len(line) - len(name) - len('##') - name = name.strip() - anchor = name_to_anchor(name) - yield TocItem(name, anchor, level) - -def toc_string(toc_items): - lines = ['## %s' % toc_name, ''] - for name, anchor, level in toc_items: - padding = ' ' * level - line = '%s- [%s](#%s)' % (padding, name, anchor) - lines.append(line) - return '\n'.join(lines) + '\n' - -def add_toc(filename): - with open(filename, 'r', encoding='utf-8') as f: - lines = f.readlines() - toc_index = get_toc_index(lines) - if toc_index is None: - return None # no TOC heading - toc_items = list(get_toc_items(lines, toc_index)) - if not toc_items: - return False # no content headings - with open(filename, 'w', encoding='utf-8') as f: - for i, line in enumerate(lines): - if i == toc_index: - f.write(toc_string(toc_items)) - else: - f.write(line) - return True # OK - -def main(): - if len(sys.argv) < 2: - print('*** ERROR: No filenames specified') - print(__doc__) - exit(1) - for filename in sys.argv[1:]: - print(filename) - result = add_toc(filename) - if result is None: - print('*** WARNING: No "## TOC" heading found') - elif result is False: - print('*** WARNING: No content headings found') - else: - print('OK') - -if __name__ == '__main__': - main() +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Usage: python3 toc.py [-n] files.md... +Replace a "## TOC" heading in a Markdown file with a table of contents, +generated from the other headings in the file. Supports multiple files. +Headings must start with "##" signs to be detected. +""" + +import sys +import re +from collections import namedtuple + +toc_name = 'Contents' +valid_toc_headings = {'## TOC', '##TOC'} + +TocItem = namedtuple('TocItem', ['name', 'anchor', 'level']) +punctuation_regexp = re.compile(r'[^\w\- ]+') + +def name_to_anchor(name): + # GitHub's algorithm for generating anchors from headings + # https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb + anchor = name.strip().lower() # lowercase + anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation + anchor = anchor.replace(' ', '-') # replace spaces with dash + return anchor + +def get_toc_index(lines): + toc_index = None + for i, line in enumerate(lines): + if line.rstrip() in valid_toc_headings: + toc_index = i + break + return toc_index + +def get_toc_items(lines, toc_index): + for i, line in enumerate(lines): + if i <= toc_index: + continue + if line.startswith('##'): + name = line.lstrip('#') + level = len(line) - len(name) - len('##') + name = name.strip() + anchor = name_to_anchor(name) + yield TocItem(name, anchor, level) + +def toc_string(toc_items): + lines = ['## %s' % toc_name, ''] + for name, anchor, level in toc_items: + padding = ' ' * level + line = '%s- [%s](#%s)' % (padding, name, anchor) + lines.append(line) + return '\n'.join(lines) + '\n' + +def add_toc(filename): + with open(filename, 'r', encoding='utf-8') as f: + lines = f.readlines() + toc_index = get_toc_index(lines) + if toc_index is None: + return None # no TOC heading + toc_items = list(get_toc_items(lines, toc_index)) + if not toc_items: + return False # no content headings + with open(filename, 'w', encoding='utf-8') as f: + for i, line in enumerate(lines): + if i == toc_index: + f.write(toc_string(toc_items)) + else: + f.write(line) + return True # OK + +def main(): + if len(sys.argv) < 2: + print('*** ERROR: No filenames specified') + print(__doc__) + exit(1) + for filename in sys.argv[1:]: + print(filename) + result = add_toc(filename) + if result is None: + print('*** WARNING: No "## TOC" heading found') + elif result is False: + print('*** WARNING: No content headings found') + else: + print('OK') + +if __name__ == '__main__': + main()