Исходный код calibre.ebooks.oeb.polish.toc

#!/usr/bin/env python


__license__   = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

import re
from collections import Counter, OrderedDict
from functools import partial
from operator import itemgetter
from urllib.parse import urlparse

from lxml import etree
from lxml.builder import ElementMaker

from calibre import __version__
from calibre.ebooks.oeb.base import EPUB_NS, NCX, NCX_NS, OEB_DOCS, XHTML, XHTML_NS, XML, XML_NS, XPath, serialize, uuid_id, xml2text
from calibre.ebooks.oeb.polish.errors import MalformedMarkup
from calibre.ebooks.oeb.polish.opf import get_book_language, set_guide_item
from calibre.ebooks.oeb.polish.pretty import pretty_html_tree, pretty_xml_tree
from calibre.ebooks.oeb.polish.utils import extract, guess_type
from calibre.translations.dynamic import translate
from calibre.utils.localization import canonicalize_lang, get_lang, lang_as_iso639_1
from calibre.utils.resources import get_path as P

ns = etree.FunctionNamespace('calibre_xpath_extensions')
ns.prefix = 'calibre'
ns['lower-case'] = lambda c, x: x.lower() if hasattr(x, 'lower') else x


class TOC:

    toc_title = None

    def __init__(self, title=None, dest=None, frag=None):
        self.title, self.dest, self.frag = title, dest, frag
        self.dest_exists = self.dest_error = None
        if self.title:
            self.title = self.title.strip()
        self.parent = None
        self.children = []
        self.page_list = []

    def add(self, title, dest, frag=None):
        c = TOC(title, dest, frag)
        self.children.append(c)
        c.parent = self
        return c

    def remove(self, child):
        self.children.remove(child)
        child.parent = None

    def remove_from_parent(self):
        if self.parent is None:
            return
        idx = self.parent.children.index(self)
        for child in reversed(self.children):
            child.parent = self.parent
            self.parent.children.insert(idx, child)
        self.parent.children.remove(self)
        self.parent = None

    def __iter__(self):
        yield from self.children

    def __len__(self):
        return len(self.children)

    def iterdescendants(self, level=None):
        gc_level = None if level is None else level + 1
        for child in self:
            if level is None:
                yield child
            else:
                yield level, child
            yield from child.iterdescendants(level=gc_level)

    def remove_duplicates(self, only_text=True):
        seen = set()
        remove = []
        for child in self:
            key = child.title if only_text else (child.title, child.dest, (child.frag or None))
            if key in seen:
                remove.append(child)
            else:
                seen.add(key)
                child.remove_duplicates()
        for child in remove:
            self.remove(child)

    @property
    def depth(self):
        '''The maximum depth of the navigation tree rooted at this node.'''
        try:
            return max(node.depth for node in self) + 1
        except ValueError:
            return 1

    @property
    def last_child(self):
        return self.children[-1] if self.children else None

    def get_lines(self, lvl=0):
        frag = ('#'+self.frag) if self.frag else ''
        ans = [('\t'*lvl) + f'TOC: {self.title} --> {self.dest}{frag}']
        for child in self:
            ans.extend(child.get_lines(lvl+1))
        return ans

    def __str__(self):
        return '\n'.join(self.get_lines())

    def to_dict(self, node_counter=None):
        ans = {
            'title':self.title, 'dest':self.dest, 'frag':self.frag,
            'children':[c.to_dict(node_counter) for c in self.children]
        }
        if self.dest_exists is not None:
            ans['dest_exists'] = self.dest_exists
        if self.dest_error is not None:
            ans['dest_error'] = self.dest_error
        if node_counter is not None:
            ans['id'] = next(node_counter)
        return ans

    @property
    def as_dict(self):
        return self.to_dict()


def child_xpath(tag, name):
    return tag.xpath(f'./*[calibre:lower-case(local-name()) = "{name}"]')


def add_from_navpoint(container, navpoint, parent, ncx_name):
    dest = frag = text = None
    nl = child_xpath(navpoint, 'navlabel')
    if nl:
        nl = nl[0]
        text = ''
        for txt in child_xpath(nl, 'text'):
            text += etree.tostring(txt, method='text',
                    encoding='unicode', with_tail=False)
    content = child_xpath(navpoint, 'content')
    if content:
        content = content[0]
        href = content.get('src', None)
        if href:
            dest = container.href_to_name(href, base=ncx_name)
            frag = urlparse(href).fragment or None
    return parent.add(text or None, dest or None, frag or None)


def process_ncx_node(container, node, toc_parent, ncx_name):
    for navpoint in node.xpath('./*[calibre:lower-case(local-name()) = "navpoint"]'):
        child = add_from_navpoint(container, navpoint, toc_parent, ncx_name)
        if child is not None:
            process_ncx_node(container, navpoint, child, ncx_name)


def parse_ncx(container, ncx_name):
    root = container.parsed(ncx_name)
    toc_root = TOC()
    navmaps = root.xpath('//*[calibre:lower-case(local-name()) = "navmap"]')
    if navmaps:
        process_ncx_node(container, navmaps[0], toc_root, ncx_name)
    toc_root.lang = toc_root.uid = None
    for attr, val in root.attrib.items():
        if attr.endswith('lang'):
            toc_root.lang = str(val)
            break
    for uid in root.xpath('//*[calibre:lower-case(local-name()) = "meta" and @name="dtb:uid"]/@content'):
        if uid:
            toc_root.uid = str(uid)
            break
    for pl in root.xpath('//*[calibre:lower-case(local-name()) = "pagelist"]'):
        for pt in pl.xpath('descendant::*[calibre:lower-case(local-name()) = "pagetarget"]'):
            pagenum = pt.get('value')
            if pagenum:
                href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src')
                if href:
                    dest = container.href_to_name(href[0], base=ncx_name)
                    frag = urlparse(href[0]).fragment or None
                    toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag})
    return toc_root


def add_from_li(container, li, parent, nav_name):
    dest = frag = text = None
    for x in li.iterchildren(XHTML('a'), XHTML('span')):
        text = etree.tostring(x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(x.xpath('descendant-or-self::*/@title')).strip()
        href = x.get('href')
        if href:
            dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
            frag = urlparse(href).fragment or None
        break
    return parent.add(text or None, dest or None, frag or None)


def first_child(parent, tagname):
    try:
        return next(parent.iterchildren(tagname))
    except StopIteration:
        return None


def process_nav_node(container, node, toc_parent, nav_name):
    for li in node.iterchildren(XHTML('li')):
        child = add_from_li(container, li, toc_parent, nav_name)
        ol = first_child(li, XHTML('ol'))
        if child is not None and ol is not None:
            process_nav_node(container, ol, child, nav_name)


def parse_nav(container, nav_name):
    root = container.parsed(nav_name)
    toc_root = TOC()
    toc_root.lang = toc_root.uid = None
    seen_toc = seen_pagelist = False
    et = f'{{{EPUB_NS}}}type'
    for nav in XPath('descendant::h:nav[@epub:type]')(root):
        nt = nav.get(et)
        if nt == 'toc' and not seen_toc:
            ol = first_child(nav, XHTML('ol'))
            if ol is not None:
                seen_toc = True
                process_nav_node(container, ol, toc_root, nav_name)
                for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
                    text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title')
                    if text:
                        toc_root.toc_title = text
                        break
        elif nt == 'page-list' and not seen_pagelist:
            ol = first_child(nav, XHTML('ol'))
            if ol is not None and not seen_pagelist:
                seen_pagelist = True
                for li in ol.iterchildren(XHTML('li')):
                    for a in li.iterchildren(XHTML('a')):
                        href = a.get('href')
                        if href:
                            text = (etree.tostring(a, method='text', encoding='unicode', with_tail=False) or a.get('title')).strip()
                            if text:
                                dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
                                frag = urlparse(href).fragment or None
                                toc_root.page_list.append({'dest': dest, 'pagenum': text, 'frag': frag})
    return toc_root


def verify_toc_destinations(container, toc):
    anchor_map = {}
    anchor_xpath = XPath('//*/@id|//h:a/@name')
    for item in toc.iterdescendants():
        name = item.dest
        if not name:
            item.dest_exists = False
            item.dest_error = _('No file named %s exists')%name
            continue
        try:
            root = container.parsed(name)
        except KeyError:
            item.dest_exists = False
            item.dest_error = _('No file named %s exists')%name
            continue
        if not hasattr(root, 'xpath'):
            item.dest_exists = False
            item.dest_error = _('No HTML file named %s exists')%name
            continue
        if not item.frag:
            item.dest_exists = True
            continue
        if name not in anchor_map:
            anchor_map[name] = frozenset(anchor_xpath(root))
        item.dest_exists = item.frag in anchor_map[name]
        if not item.dest_exists:
            item.dest_error = _(
                'The anchor %(a)s does not exist in file %(f)s')%dict(
                a=item.frag, f=name)


def find_existing_ncx_toc(container):
    toc = container.opf_xpath('//opf:spine/@toc')
    if toc:
        toc = container.manifest_id_map.get(toc[0], None)
    if not toc:
        ncx = guess_type('a.ncx')
        toc = container.manifest_type_map.get(ncx, [None])[0]
    return toc or None


def find_existing_nav_toc(container):
    for name in container.manifest_items_with_property('nav'):
        return name


def mark_as_nav(container, name):
    if container.opf_version_parsed.major > 2:
        container.apply_unique_properties(name, 'nav')


def get_x_toc(container, find_toc, parse_toc, verify_destinations=True):
    def empty_toc():
        ans = TOC()
        ans.lang = ans.uid = None
        return ans
    toc = find_toc(container)
    ans = empty_toc() if toc is None or not container.has_name(toc) else parse_toc(container, toc)
    ans.toc_file_name = toc if toc and container.has_name(toc) else None
    if verify_destinations:
        verify_toc_destinations(container, ans)
    return ans


def get_toc(container, verify_destinations=True):
    ver = container.opf_version_parsed
    if ver.major < 3:
        return get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations)
    else:
        ans = get_x_toc(container, find_existing_nav_toc, parse_nav, verify_destinations=verify_destinations)
        if len(ans) == 0:
            ans = get_x_toc(container, find_existing_ncx_toc, parse_ncx, verify_destinations=verify_destinations)
        return ans


def get_guide_landmarks(container):
    for ref in container.opf_xpath('./opf:guide/opf:reference'):
        href, title, rtype = ref.get('href'), ref.get('title'), ref.get('type')
        href, frag = href.partition('#')[::2]
        name = container.href_to_name(href, container.opf_name)
        if container.has_name(name):
            yield {'dest':name, 'frag':frag, 'title':title or '', 'type':rtype or ''}


def get_nav_landmarks(container):
    nav = find_existing_nav_toc(container)
    if nav and container.has_name(nav):
        root = container.parsed(nav)
        et = f'{{{EPUB_NS}}}type'
        for elem in root.iterdescendants(XHTML('nav')):
            if elem.get(et) == 'landmarks':
                for li in elem.iterdescendants(XHTML('li')):
                    for a in li.iterdescendants(XHTML('a')):
                        href, rtype = a.get('href'), a.get(et)
                        if href:
                            title = etree.tostring(a, method='text', encoding='unicode', with_tail=False).strip()
                            href, frag = href.partition('#')[::2]
                            name = container.href_to_name(href, nav)
                            if container.has_name(name):
                                yield {'dest':name, 'frag':frag, 'title':title or '', 'type':rtype or ''}
                            break


def get_landmarks(container):
    ver = container.opf_version_parsed
    if ver.major < 3:
        return list(get_guide_landmarks(container))
    ans = list(get_nav_landmarks(container))
    if len(ans) == 0:
        ans = list(get_guide_landmarks(container))
    return ans


def ensure_id(elem, all_ids):
    elem_id = elem.get('id')
    if elem_id:
        return False, elem_id
    if elem.tag == XHTML('a'):
        anchor = elem.get('name', None)
        if anchor:
            elem.set('id', anchor)
            return False, anchor
    c = 0
    while True:
        c += 1
        q = f'toc_{c}'
        if q not in all_ids:
            elem.set('id', q)
            all_ids.add(q)
            break
    return True, elem.get('id')


def elem_to_toc_text(elem, prefer_title=False):
    text = xml2text(elem).strip()
    if prefer_title:
        text = elem.get('title', '').strip() or text
    if not text:
        text = elem.get('title', '')
    if not text:
        text = elem.get('alt', '')
    text = re.sub(r'\s+', ' ', text.strip())
    text = text[:1000].strip()
    if not text:
        text = _('(Untitled)')
    return text


def item_at_top(elem):
    try:
        body = XPath('//h:body')(elem.getroottree().getroot())[0]
    except (TypeError, IndexError, KeyError, AttributeError):
        return False
    tree = body.getroottree()
    path = tree.getpath(elem)
    for el in body.iterdescendants(etree.Element):
        epath = tree.getpath(el)
        if epath == path:
            break
        try:
            if el.tag.endswith('}img') or (el.text and el.text.strip()):
                return False
        except Exception:
            return False
        if not path.startswith(epath):
            # Only check tail of non-parent elements
            if el.tail and el.tail.strip():
                return False
    return True



[документация]
def from_xpaths(container, xpaths, prefer_title=False):
    '''
    Generate a Table of Contents from a list of XPath expressions. Each
    expression in the list corresponds to a level of the generate ToC. For
    example: :code:`['//h:h1', '//h:h2', '//h:h3']` will generate a three level
    Table of Contents from the ``<h1>``, ``<h2>`` and ``<h3>`` tags.
    '''
    tocroot = TOC()
    xpaths = [XPath(xp) for xp in xpaths]

    # Find those levels that have no elements in all spine items
    maps = OrderedDict()
    empty_levels = {i+1 for i, xp in enumerate(xpaths)}
    for spinepath in container.spine_items:
        name = container.abspath_to_name(spinepath)
        root = container.parsed(name)
        level_item_map = maps[name] = {i+1:frozenset(xp(root)) for i, xp in enumerate(xpaths)}
        for lvl, elems in level_item_map.items():
            if elems:
                empty_levels.discard(lvl)
    # Remove empty levels from all level_maps
    if empty_levels:
        for name, lmap in tuple(maps.items()):
            lmap = {lvl:items for lvl, items in lmap.items() if lvl not in empty_levels}
            lmap = sorted(lmap.items(), key=itemgetter(0))
            lmap = {i+1:items for i, (l, items) in enumerate(lmap)}
            maps[name] = lmap

    node_level_map = {tocroot: 0}

    def parent_for_level(child_level):
        limit = child_level - 1

        def process_node(node):
            child = node.last_child
            if child is None:
                return node
            lvl = node_level_map[child]
            return node if lvl > limit else child if lvl == limit else process_node(child)

        return process_node(tocroot)

    for name, level_item_map in maps.items():
        root = container.parsed(name)
        item_level_map = {e:i for i, elems in level_item_map.items() for e in elems}
        item_dirtied = False
        all_ids = set(root.xpath('//*/@id'))

        for item in root.iterdescendants(etree.Element):
            lvl = item_level_map.get(item, None)
            if lvl is None:
                continue
            text = elem_to_toc_text(item, prefer_title)
            parent = parent_for_level(lvl)
            if item_at_top(item):
                dirtied, elem_id = False, None
            else:
                dirtied, elem_id = ensure_id(item, all_ids)
            item_dirtied = dirtied or item_dirtied
            toc = parent.add(text, name, elem_id)
            node_level_map[toc] = lvl
            toc.dest_exists = True

        if item_dirtied:
            container.commit_item(name, keep_parsed=True)

    return tocroot




[документация]
def from_links(container):
    '''
    Generate a Table of Contents from links in the book.
    '''
    toc = TOC()
    link_path = XPath('//h:a[@href]')
    seen_titles, seen_dests = set(), set()
    for name, is_linear in container.spine_names:
        root = container.parsed(name)
        for a in link_path(root):
            href = a.get('href')
            if not href or not href.strip():
                continue
            frag = None
            if href.startswith('#'):
                dest = name
                frag = href[1:]
            else:
                href, _, frag = href.partition('#')
                dest = container.href_to_name(href, base=name)
            frag = frag or None
            if (dest, frag) in seen_dests:
                continue
            seen_dests.add((dest, frag))
            text = elem_to_toc_text(a)
            if text in seen_titles:
                continue
            seen_titles.add(text)
            toc.add(text, dest, frag=frag)
    verify_toc_destinations(container, toc)
    for child in toc:
        if not child.dest_exists:
            toc.remove(child)
    return toc



def find_text(node):
    LIMIT = 200
    pat = re.compile(r'\s+')
    for child in node:
        if isinstance(child, etree._Element):
            text = xml2text(child).strip()
            text = pat.sub(' ', text)
            if len(text) < 1:
                continue
            if len(text) > LIMIT:
                # Look for less text in a child of this node, recursively
                ntext = find_text(child)
                return ntext or (text[:LIMIT] + '...')
            else:
                return text



[документация]
def from_files(container):
    '''
    Generate a Table of Contents from files in the book.
    '''
    toc = TOC()
    for i, spinepath in enumerate(container.spine_items):
        name = container.abspath_to_name(spinepath)
        root = container.parsed(name)
        body = XPath('//h:body')(root)
        if not body:
            continue
        text = find_text(body[0])
        if not text:
            text = name.rpartition('/')[-1]
            if i == 0 and text.rpartition('.')[0].lower() in {'titlepage', 'cover'}:
                text = _('Cover')
        toc.add(text, name)
    return toc



def node_from_loc(root, locs, totals=None):
    node = root.xpath('//*[local-name()="body"]')[0]
    for i, loc in enumerate(locs):
        children = tuple(node.iterchildren(etree.Element))
        if totals is not None and totals[i] != len(children):
            raise MalformedMarkup()
        node = children[loc]
    return node


def add_id(container, name, loc, totals=None):
    root = container.parsed(name)
    try:
        node = node_from_loc(root, loc, totals=totals)
    except MalformedMarkup:
        # The webkit HTML parser and the container parser have yielded
        # different node counts, this can happen if the file is valid XML
        # but contains constructs like nested <p> tags. So force parse it
        # with the HTML 5 parser and try again.
        raw = container.raw_data(name)
        root = container.parse_xhtml(raw, fname=name, force_html5_parse=True)
        try:
            node = node_from_loc(root, loc, totals=totals)
        except MalformedMarkup:
            raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool'
                                    ' before editing.') % name)
        container.replace(name, root)

    if not node.get('id'):
        ensure_id(node, set(root.xpath('//*/@id')))
    container.commit_item(name, keep_parsed=True)
    return node.get('id')


def create_ncx(toc, to_href, btitle, lang, uid):
    lang = lang.replace('_', '-')
    ncx = etree.Element(NCX('ncx'),
        attrib={'version': '2005-1', XML('lang'): lang},
        nsmap={None: NCX_NS})
    head = etree.SubElement(ncx, NCX('head'))
    etree.SubElement(head, NCX('meta'),
        name='dtb:uid', content=str(uid))
    etree.SubElement(head, NCX('meta'),
        name='dtb:depth', content=str(toc.depth))
    generator = ''.join(['calibre (', __version__, ')'])
    etree.SubElement(head, NCX('meta'),
        name='dtb:generator', content=generator)
    etree.SubElement(head, NCX('meta'), name='dtb:totalPageCount', content='0')
    etree.SubElement(head, NCX('meta'), name='dtb:maxPageNumber', content='0')
    title = etree.SubElement(ncx, NCX('docTitle'))
    text = etree.SubElement(title, NCX('text'))
    text.text = btitle
    navmap = etree.SubElement(ncx, NCX('navMap'))
    spat = re.compile(r'\s+')

    play_order = Counter()

    def process_node(xml_parent, toc_parent):
        for child in toc_parent:
            play_order['c'] += 1
            point = etree.SubElement(xml_parent, NCX('navPoint'), id=f"num_{play_order['c']}",
                            playOrder=str(play_order['c']))
            label = etree.SubElement(point, NCX('navLabel'))
            title = child.title
            if title:
                title = spat.sub(' ', title)
            etree.SubElement(label, NCX('text')).text = title
            if child.dest:
                href = to_href(child.dest)
                if child.frag:
                    href += '#'+child.frag
                etree.SubElement(point, NCX('content'), src=href)
            process_node(point, child)

    process_node(navmap, toc)
    return ncx


def commit_ncx_toc(container, toc, lang=None, uid=None):
    tocname = find_existing_ncx_toc(container)
    if tocname is None:
        item = container.generate_item('toc.ncx', id_prefix='toc')
        tocname = container.href_to_name(item.get('href'), base=container.opf_name)
        ncx_id = item.get('id')
        [s.set('toc', ncx_id) for s in container.opf_xpath('//opf:spine')]
    if not lang:
        lang = get_lang()
        for l in container.opf_xpath('//dc:language'):
            l = canonicalize_lang(xml2text(l).strip())
            if l:
                lang = l
                lang = lang_as_iso639_1(l) or l
                break
    lang = lang_as_iso639_1(lang) or lang
    if not uid:
        uid = uuid_id()
        eid = container.opf.get('unique-identifier', None)
        if eid:
            m = container.opf_xpath(f'//*[@id="{eid}"]')
            if m:
                uid = xml2text(m[0])

    title = _('Table of Contents')
    m = container.opf_xpath('//dc:title')
    if m:
        x = xml2text(m[0]).strip()
        title = x or title

    to_href = partial(container.name_to_href, base=tocname)
    root = create_ncx(toc, to_href, title, lang, uid)
    container.replace(tocname, root)
    container.pretty_print.add(tocname)


def ensure_single_nav_of_type(root, ntype='toc'):
    et = f'{{{EPUB_NS}}}type'
    navs = [n for n in root.iterdescendants(XHTML('nav')) if n.get(et) == ntype]
    for x in navs[1:]:
        extract(x)
    if navs:
        nav = navs[0]
        tail = nav.tail
        attrib = dict(nav.attrib)
        nav.clear()
        nav.attrib.update(attrib)
        nav.tail = tail
    else:
        nav = root.makeelement(XHTML('nav'))
        first_child(root, XHTML('body')).append(nav)
    nav.set(f'{{{EPUB_NS}}}type', ntype)
    return nav


def ensure_container_has_nav(container, lang=None, previous_nav=None):
    tocname = find_existing_nav_toc(container)
    if previous_nav is not None:
        nav_name = container.href_to_name(previous_nav[0])
        if nav_name and container.exists(nav_name):
            tocname = nav_name
            container.apply_unique_properties(tocname, 'nav')
    if tocname is None:
        name = previous_nav[0] if previous_nav is not None else 'nav.xhtml'
        item = container.generate_item(name, id_prefix='nav')
        item.set('properties', 'nav')
        tocname = container.href_to_name(item.get('href'), base=container.opf_name)
        if previous_nav is not None:
            root = previous_nav[1]
        else:
            root = container.parse_xhtml(P('templates/new_nav.html', data=True).decode('utf-8'))
        container.replace(tocname, root)
    else:
        root = container.parsed(tocname)
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        root.set('lang', lang)
        root.set(f'{{{XML_NS}}}lang', lang)
    return tocname, root


def collapse_li(parent):
    for li in parent.iterdescendants(XHTML('li')):
        if len(li) == 1:
            li.text = None
            li[0].tail = None


def create_nav_li(container, ol, entry, tocname):
    li = ol.makeelement(XHTML('li'))
    ol.append(li)
    a = li.makeelement(XHTML('a'))
    li.append(a)
    href = container.name_to_href(entry['dest'], tocname)
    if entry['frag']:
        href += '#' + entry['frag']
    a.set('href', href)
    return a


def set_landmarks(container, root, tocname, landmarks):
    nav = ensure_single_nav_of_type(root, 'landmarks')
    nav.set('hidden', '')
    ol = nav.makeelement(XHTML('ol'))
    nav.append(ol)
    for entry in landmarks:
        if entry['type'] and container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS:
            a = create_nav_li(container, ol, entry, tocname)
            a.set(f'{{{EPUB_NS}}}type', entry['type'])
            a.text = entry['title'] or None
    pretty_xml_tree(nav)
    collapse_li(nav)


def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None):
    tocname, root = ensure_container_has_nav(container, lang=lang, previous_nav=previous_nav)
    nav = ensure_single_nav_of_type(root, 'toc')
    if toc.toc_title:
        nav.append(nav.makeelement(XHTML('h1')))
        nav[-1].text = toc.toc_title

    rnode = nav.makeelement(XHTML('ol'))
    nav.append(rnode)
    to_href = partial(container.name_to_href, base=tocname)
    spat = re.compile(r'\s+')

    def process_node(xml_parent, toc_parent):
        for child in toc_parent:
            li = xml_parent.makeelement(XHTML('li'))
            xml_parent.append(li)
            title = child.title or ''
            title = spat.sub(' ', title).strip()
            a = li.makeelement(XHTML('a' if child.dest else 'span'))
            a.text = title
            li.append(a)
            if child.dest:
                href = to_href(child.dest)
                if child.frag:
                    href += '#'+child.frag
                a.set('href', href)
            if len(child):
                ol = li.makeelement(XHTML('ol'))
                li.append(ol)
                process_node(ol, child)
    process_node(rnode, toc)
    pretty_xml_tree(nav)

    collapse_li(nav)
    nav.tail = '\n'

    if toc.page_list:
        nav = ensure_single_nav_of_type(root, 'page-list')
        nav.set('hidden', '')
        ol = nav.makeelement(XHTML('ol'))
        nav.append(ol)
        for entry in toc.page_list:
            if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS:
                a = create_nav_li(container, ol, entry, tocname)
                a.text = str(entry['pagenum'])
        pretty_xml_tree(nav)
        collapse_li(nav)
    container.replace(tocname, root)


def commit_toc(container, toc, lang=None, uid=None):
    commit_ncx_toc(container, toc, lang=lang, uid=uid)
    if container.opf_version_parsed.major > 2:
        commit_nav_toc(container, toc, lang=lang)


def remove_names_from_toc(container, names):
    changed = []
    names = frozenset(names)
    for find_toc, parse_toc, commit_toc in (
            (find_existing_ncx_toc, parse_ncx, commit_ncx_toc),
            (find_existing_nav_toc, parse_nav, commit_nav_toc),
    ):
        toc = get_x_toc(container, find_toc, parse_toc, verify_destinations=False)
        if len(toc) > 0:
            remove = []
            for node in toc.iterdescendants():
                if node.dest in names:
                    remove.append(node)
            if remove:
                for node in reversed(remove):
                    node.remove_from_parent()
                commit_toc(container, toc)
                changed.append(find_toc(container))
    return changed


def find_inline_toc(container):
    for name, linear in container.spine_names:
        if container.parsed(name).xpath('//*[local-name()="body" and @id="calibre_generated_inline_toc"]'):
            return name


def toc_to_html(toc, container, toc_name, title, lang=None):

    def process_node(html_parent, toc, level=1, indent='  ', style_level=2):
        li = html_parent.makeelement(XHTML('li'))
        li.tail = '\n'+ (indent*level)
        html_parent.append(li)
        name, frag = toc.dest, toc.frag
        href = '#'
        if name:
            href = container.name_to_href(name, toc_name)
            if frag:
                href += '#' + frag
        a = li.makeelement(XHTML('a'), href=href)
        a.text = toc.title
        li.append(a)
        if len(toc) > 0:
            parent = li.makeelement(XHTML('ul'))
            parent.set('class', f'level{style_level}')
            li.append(parent)
            a.tail = '\n\n' + (indent*(level+2))
            parent.text = '\n'+(indent*(level+3))
            parent.tail = '\n\n' + (indent*(level+1))
            for child in toc:
                process_node(parent, child, level+3, style_level=style_level + 1)
            parent[-1].tail = '\n' + (indent*(level+2))

    E = ElementMaker(namespace=XHTML_NS, nsmap={None:XHTML_NS})
    html = E.html(
        E.head(
            E.title(title),
            E.style(P('templates/inline_toc_styles.css', data=True).decode('utf-8'), type='text/css'),
        ),
        E.body(
            E.h2(title),
            E.ul(),
            id='calibre_generated_inline_toc',
        )
    )

    ul = html[1][1]
    ul.set('class', 'level1')
    for child in toc:
        process_node(ul, child)
    if lang:
        html.set('lang', lang)
    pretty_html_tree(container, html)
    return html



[документация]
def create_inline_toc(container, title=None):
    '''
    Create an inline (HTML) Table of Contents from an existing NCX Table of Contents.

    :param title: The title for this table of contents.
    '''
    lang = get_book_language(container)
    default_title = 'Table of Contents'
    if lang:
        lang = lang_as_iso639_1(lang) or lang
        default_title = translate(lang, default_title)
    title = title or default_title
    toc = get_toc(container)
    if len(toc) == 0:
        return None
    toc_name = find_inline_toc(container)

    name = toc_name
    html = toc_to_html(toc, container, name, title, lang)
    raw = serialize(html, 'text/html')
    if name is None:
        name, c = 'toc.xhtml', 0
        while container.has_name(name):
            c += 1
            name = f'toc{c}.xhtml'
        container.add_file(name, raw, spine_index=0)
    else:
        with container.open(name, 'wb') as f:
            f.write(raw)
    set_guide_item(container, 'toc', title, name, frag='calibre_generated_inline_toc')
    return name