2018-09-20 11:32:53 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
"""
|
|
|
|
Usage: python3 toc.py [-n] files.md...
|
2021-09-02 18:48:17 -04:00
|
|
|
|
2018-09-20 11:32:53 -04:00
|
|
|
Replace a "## TOC" heading in a Markdown file with a table of contents,
|
|
|
|
generated from the other headings in the file. Supports multiple files.
|
|
|
|
Headings must start with "##" signs to be detected.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import re
|
|
|
|
from collections import namedtuple
|
|
|
|
|
|
|
|
toc_name = 'Contents'
|
|
|
|
valid_toc_headings = {'## TOC', '##TOC'}
|
|
|
|
|
|
|
|
TocItem = namedtuple('TocItem', ['name', 'anchor', 'level'])
|
|
|
|
punctuation_regexp = re.compile(r'[^\w\- ]+')
|
2019-06-09 00:05:32 +02:00
|
|
|
specialchar_regexp = re.compile(r'[⅔]+')
|
2018-09-20 11:32:53 -04:00
|
|
|
|
|
|
|
def name_to_anchor(name):
|
|
|
|
# GitHub's algorithm for generating anchors from headings
|
|
|
|
# https://github.com/jch/html-pipeline/blob/master/lib/html/pipeline/toc_filter.rb
|
|
|
|
anchor = name.strip().lower() # lowercase
|
|
|
|
anchor = re.sub(punctuation_regexp, '', anchor) # remove punctuation
|
|
|
|
anchor = anchor.replace(' ', '-') # replace spaces with dash
|
2019-06-09 00:05:32 +02:00
|
|
|
anchor = re.sub(specialchar_regexp, '', anchor) # remove misc special chars
|
2018-09-20 11:32:53 -04:00
|
|
|
return anchor
|
|
|
|
|
|
|
|
def get_toc_index(lines):
|
|
|
|
toc_index = None
|
|
|
|
for i, line in enumerate(lines):
|
|
|
|
if line.rstrip() in valid_toc_headings:
|
|
|
|
toc_index = i
|
|
|
|
break
|
|
|
|
return toc_index
|
|
|
|
|
|
|
|
def get_toc_items(lines, toc_index):
|
|
|
|
for i, line in enumerate(lines):
|
|
|
|
if i <= toc_index:
|
|
|
|
continue
|
|
|
|
if line.startswith('##'):
|
|
|
|
name = line.lstrip('#')
|
|
|
|
level = len(line) - len(name) - len('##')
|
|
|
|
name = name.strip()
|
|
|
|
anchor = name_to_anchor(name)
|
|
|
|
yield TocItem(name, anchor, level)
|
|
|
|
|
|
|
|
def toc_string(toc_items):
|
|
|
|
lines = ['## %s' % toc_name, '']
|
|
|
|
for name, anchor, level in toc_items:
|
|
|
|
padding = ' ' * level
|
|
|
|
line = '%s- [%s](#%s)' % (padding, name, anchor)
|
|
|
|
lines.append(line)
|
|
|
|
return '\n'.join(lines) + '\n'
|
|
|
|
|
|
|
|
def add_toc(filename):
|
|
|
|
with open(filename, 'r', encoding='utf-8') as f:
|
|
|
|
lines = f.readlines()
|
|
|
|
toc_index = get_toc_index(lines)
|
|
|
|
if toc_index is None:
|
|
|
|
return None # no TOC heading
|
|
|
|
toc_items = list(get_toc_items(lines, toc_index))
|
|
|
|
if not toc_items:
|
|
|
|
return False # no content headings
|
|
|
|
with open(filename, 'w', encoding='utf-8') as f:
|
|
|
|
for i, line in enumerate(lines):
|
|
|
|
if i == toc_index:
|
|
|
|
f.write(toc_string(toc_items))
|
|
|
|
else:
|
|
|
|
f.write(line)
|
|
|
|
return True # OK
|
|
|
|
|
|
|
|
def main():
|
|
|
|
if len(sys.argv) < 2:
|
|
|
|
print('*** ERROR: No filenames specified')
|
|
|
|
print(__doc__)
|
|
|
|
exit(1)
|
|
|
|
for filename in sys.argv[1:]:
|
|
|
|
print(filename)
|
|
|
|
result = add_toc(filename)
|
|
|
|
if result is None:
|
|
|
|
print('*** WARNING: No "## TOC" heading found')
|
|
|
|
elif result is False:
|
|
|
|
print('*** WARNING: No content headings found')
|
|
|
|
else:
|
|
|
|
print('OK')
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|