-
Notifications
You must be signed in to change notification settings - Fork 844
/
Copy pathextract_toc.py
66 lines (54 loc) · 2.03 KB
/
extract_toc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- coding: utf-8 -*-
"""
Extract Table of Content
========================
A Pelican plugin to extract table of contents (ToC) from `article.content` and
place it in its own `article.toc` variable for use in templates.
"""
from os import path
from bs4 import BeautifulSoup
from pelican import signals, readers, contents
import logging
logger = logging.getLogger(__name__)
def extract_toc(content):
if isinstance(content, contents.Static):
return
soup = BeautifulSoup(content._content, 'html.parser')
filename = content.source_path
extension = path.splitext(filename)[1][1:]
toc = None
# default Markdown reader
if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions:
toc = soup.find('div', class_='toc')
if toc:
toc.extract()
if len(toc.find_next('ul').find_all('li')) == 0:
toc = None
# default reStructuredText reader
if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions:
toc = soup.find('div', class_='contents topic')
if toc:
toc.extract()
tag = BeautifulSoup(str(toc), 'html.parser')
tag.div['class'] = 'toc'
tag.div['id'] = ''
p = tag.find('p', class_='topic-title first')
if p:
p.extract()
toc = tag
# Pandoc reader (markdown and other formats)
if 'pandoc_reader' in content.settings['PLUGINS']:
try:
from pandoc_reader import PandocReader
except ImportError:
PandocReader = False
if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions:
toc = soup.find('nav', id='TOC')
if toc:
toc.extract()
content._content = soup.decode()
content.toc = toc.decode()
if content.toc.startswith('<html>'):
content.toc = content.toc[12:-14]
def register():
signals.content_object_init.connect(extract_toc)