Added a Table of Contents feature to XMLtoPDFBook

Thursday, December 5, 2013

Added a Table of Contents feature to XMLtoPDFBook

By Vasudev Ram

XMLtoPDFBook is a publishing tool I created, that allows you to create simple PDF ebooks from text content in XML files.

I had blogged about XMLtoPDFBook earlier, here:

Create PDF books with XMLtoPDFBook

and here:

XMLtoPDFBook now supports chapter numbers and names

Today I added some support for a Table of Contents feature to XMLtoPDFBook. Here is the updated program:

# XMLtoPDFBook2.py

# A program to convert a book in XML text format to a PDF book.
# Uses xtopdf and ReportLab.

# Author: Vasudev Ram - http://www.dancingbison.com
# Version: v0.2

#--------------------------------------------------------------------

# imports

import sys
import os
import string
import time

from PDFWriter import PDFWriter

try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

#--------------------------------------------------------------------

# global variables

sysargv = None

#--------------------------------------------------------------------

def debug(message):
    sys.stderr.write(message + "\n")

#--------------------------------------------------------------------

def get_xml_filename(sysargv):
    return sysargv[1]

#--------------------------------------------------------------------

def get_pdf_filename(sysargv):
    return sysargv[2]

#--------------------------------------------------------------------

def XMLtoPDFBook():

    debug("Entered XMLtoPDFBook()")

    global sysargv

    # Get command-line arguments.
    xml_filename = get_xml_filename(sysargv)
    debug("xml_filename: " + xml_filename)
    pdf_filename = get_pdf_filename(sysargv)
    debug("pdf_filename: " + pdf_filename)

    # Parse the XML file.
    try:
        tree = ET.ElementTree(file=xml_filename)
        debug("tree = " + repr(tree))
    except Exception:
        sys.stderr.write("Error: caught exception in ET.ElementTree(file)")
        sys.exit(1)

    # Get the tree root.
    root = tree.getroot()
    debug("root.tag = " + root.tag)
    if root.tag != "book":
        debug("Error: Root tag is not 'book'")
        sys.exit(1)

    # Initialize the table of contents list.
    toc = []
    # Initialize the chapters list.
    chapters = []

    # Traverse the tree, extracting needed data into variables.
    debug("-" * 60)
    for root_child in root:
        if root_child.tag != "chapter":
            debug("Error: root_child tag is not 'chapter'")
            sys.exit(1)
        chapter = root_child
        #debug(chapter.text)
        chapters.append(chapter.text)
        try:
            chapter_name = chapter.attrib['name']
        except KeyError:
            chapter_name = ""
        toc.append(chapter_name)
        debug("-" * 60)

    # Create and set some fields of a PDFWriter.
    pw = PDFWriter(pdf_filename)
    pw.setFont("Courier", 12)
    pw.setFooter("Generated by XMLtoPDFBook. Copyright 2013 Vasudev Ram")

    # Write the TOC.
    pw.setHeader("Table of Contents")
    chapter_num = 0
    debug("Chapter names")
    for chapter_name in toc:
        debug(chapter_name)
        chapter_num += 1
        pw.writeLine(str(chapter_num) + ": " + chapter_name)
    pw.savePage()

    # Write the chapters.
    chapter_num = 0
    for chapter in chapters:
        chapter_num += 1
        pw.setHeader("Chapter " + str(chapter_num) + ": " + toc[chapter_num - 1])
        lines = chapter.split("\n")
        for line in lines:
            pw.writeLine(line)
        pw.savePage()

    pw.close()

    debug("Exiting XMLtoPDFBook()")

def main():

    debug("Entered main()")

    global sysargv
    sysargv = sys.argv

    # Check for right number of arguments.
    if len(sysargv) != 3:
        sys.exit(1)

    XMLtoPDFBook()

    debug("Exiting main()")

#--------------------------------------------------------------------

if __name__ == "__main__":
    try:
        main()
    except Exception, e:
        sys.stderr.write("Error: caught Exception" + str(e))
        sys.exit(1)

#--------------------------------------------------------------------

You can run it as follows:

python XMLtoPDFBook2.py vi_quickstart2.xml vi_quickstart2.pdf

where I've used my vi quickstart tutorial, first written for Linux For You magazine, as the input XML file.

Here is a screenshot of the first page of the resulting PDF ebook - the Table of Contents: