Thursday, December 5, 2013

Added a Table of Contents feature to XMLtoPDFBook

By Vasudev Ram

XMLtoPDFBook is a publishing tool I created, that allows you to create simple PDF ebooks from text content in XML files.

I had blogged about XMLtoPDFBook earlier, here:

Create PDF books with XMLtoPDFBook

and here:

XMLtoPDFBook now supports chapter numbers and names

Today I added some support for a Table of Contents feature to XMLtoPDFBook. Here is the updated program:

# A program to convert a book in XML text format to a PDF book.
# Uses xtopdf and ReportLab.

# Author: Vasudev Ram -
# Version: v0.2


# imports

import sys
import os
import string
import time

from PDFWriter import PDFWriter

    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET


# global variables

sysargv = None


def debug(message):
    sys.stderr.write(message + "\n")


def get_xml_filename(sysargv):
    return sysargv[1]


def get_pdf_filename(sysargv):
    return sysargv[2]


def XMLtoPDFBook():

    debug("Entered XMLtoPDFBook()")

    global sysargv

    # Get command-line arguments.
    xml_filename = get_xml_filename(sysargv)
    debug("xml_filename: " + xml_filename)
    pdf_filename = get_pdf_filename(sysargv)
    debug("pdf_filename: " + pdf_filename)

    # Parse the XML file.
        tree = ET.ElementTree(file=xml_filename)
        debug("tree = " + repr(tree))
    except Exception:
        sys.stderr.write("Error: caught exception in ET.ElementTree(file)")

    # Get the tree root.
    root = tree.getroot()
    debug("root.tag = " + root.tag)
    if root.tag != "book":
        debug("Error: Root tag is not 'book'")

    # Initialize the table of contents list.
    toc = []
    # Initialize the chapters list.
    chapters = []

    # Traverse the tree, extracting needed data into variables.
    debug("-" * 60)
    for root_child in root:
        if root_child.tag != "chapter":
            debug("Error: root_child tag is not 'chapter'")
        chapter = root_child
            chapter_name = chapter.attrib['name']
        except KeyError:
            chapter_name = ""
        debug("-" * 60)

    # Create and set some fields of a PDFWriter.
    pw = PDFWriter(pdf_filename)
    pw.setFont("Courier", 12)
    pw.setFooter("Generated by XMLtoPDFBook. Copyright 2013 Vasudev Ram")

    # Write the TOC.
    pw.setHeader("Table of Contents")
    chapter_num = 0
    debug("Chapter names")
    for chapter_name in toc:
        chapter_num += 1
        pw.writeLine(str(chapter_num) + ": " + chapter_name)

    # Write the chapters.
    chapter_num = 0
    for chapter in chapters:
        chapter_num += 1
        pw.setHeader("Chapter " + str(chapter_num) + ": " + toc[chapter_num - 1])
        lines = chapter.split("\n")
        for line in lines:


    debug("Exiting XMLtoPDFBook()")

def main():

    debug("Entered main()")

    global sysargv
    sysargv = sys.argv

    # Check for right number of arguments.
    if len(sysargv) != 3:


    debug("Exiting main()")


if __name__ == "__main__":
    except Exception, e:
        sys.stderr.write("Error: caught Exception" + str(e))


You can run it as follows:
python vi_quickstart2.xml vi_quickstart2.pdf 
where I've used my vi quickstart tutorial, first written for Linux For You magazine, as the input XML file.

Here is a screenshot of the first page of the resulting PDF ebook - the Table of Contents:

And here is a screenshot Chapter 3 of the book:

I've pushed the code (as file to my xtopdf project on Bitbucket.


- Vasudev Ram - Dancing Bison Enterprises

Contact Page

No comments: