XMLtoPDFBook is a publishing tool I created, that allows you to create simple PDF ebooks from text content in XML files.
I had blogged about XMLtoPDFBook earlier, here:
Create PDF books with XMLtoPDFBook
and here:
XMLtoPDFBook now supports chapter numbers and names
Today I added some support for a Table of Contents feature to XMLtoPDFBook. Here is the updated program:
# XMLtoPDFBook2.py
# A program to convert a book in XML text format to a PDF book.
# Uses xtopdf and ReportLab.
# Author: Vasudev Ram - http://www.dancingbison.com
# Version: v0.2
#--------------------------------------------------------------------
# imports
import sys
import os
import string
import time
from PDFWriter import PDFWriter
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
#--------------------------------------------------------------------
# global variables
sysargv = None
#--------------------------------------------------------------------
def debug(message):
sys.stderr.write(message + "\n")
#--------------------------------------------------------------------
def get_xml_filename(sysargv):
return sysargv[1]
#--------------------------------------------------------------------
def get_pdf_filename(sysargv):
return sysargv[2]
#--------------------------------------------------------------------
def XMLtoPDFBook():
debug("Entered XMLtoPDFBook()")
global sysargv
# Get command-line arguments.
xml_filename = get_xml_filename(sysargv)
debug("xml_filename: " + xml_filename)
pdf_filename = get_pdf_filename(sysargv)
debug("pdf_filename: " + pdf_filename)
# Parse the XML file.
try:
tree = ET.ElementTree(file=xml_filename)
debug("tree = " + repr(tree))
except Exception:
sys.stderr.write("Error: caught exception in ET.ElementTree(file)")
sys.exit(1)
# Get the tree root.
root = tree.getroot()
debug("root.tag = " + root.tag)
if root.tag != "book":
debug("Error: Root tag is not 'book'")
sys.exit(1)
# Initialize the table of contents list.
toc = []
# Initialize the chapters list.
chapters = []
# Traverse the tree, extracting needed data into variables.
debug("-" * 60)
for root_child in root:
if root_child.tag != "chapter":
debug("Error: root_child tag is not 'chapter'")
sys.exit(1)
chapter = root_child
#debug(chapter.text)
chapters.append(chapter.text)
try:
chapter_name = chapter.attrib['name']
except KeyError:
chapter_name = ""
toc.append(chapter_name)
debug("-" * 60)
# Create and set some fields of a PDFWriter.
pw = PDFWriter(pdf_filename)
pw.setFont("Courier", 12)
pw.setFooter("Generated by XMLtoPDFBook. Copyright 2013 Vasudev Ram")
# Write the TOC.
pw.setHeader("Table of Contents")
chapter_num = 0
debug("Chapter names")
for chapter_name in toc:
debug(chapter_name)
chapter_num += 1
pw.writeLine(str(chapter_num) + ": " + chapter_name)
pw.savePage()
# Write the chapters.
chapter_num = 0
for chapter in chapters:
chapter_num += 1
pw.setHeader("Chapter " + str(chapter_num) + ": " + toc[chapter_num - 1])
lines = chapter.split("\n")
for line in lines:
pw.writeLine(line)
pw.savePage()
pw.close()
debug("Exiting XMLtoPDFBook()")
def main():
debug("Entered main()")
global sysargv
sysargv = sys.argv
# Check for right number of arguments.
if len(sysargv) != 3:
sys.exit(1)
XMLtoPDFBook()
debug("Exiting main()")
#--------------------------------------------------------------------
if __name__ == "__main__":
try:
main()
except Exception, e:
sys.stderr.write("Error: caught Exception" + str(e))
sys.exit(1)
#--------------------------------------------------------------------
You can run it as follows:
python XMLtoPDFBook2.py vi_quickstart2.xml vi_quickstart2.pdfwhere I've used my vi quickstart tutorial, first written for Linux For You magazine, as the input XML file.
Here is a screenshot of the first page of the resulting PDF ebook - the Table of Contents:
And here is a screenshot Chapter 3 of the book:
I've pushed the code (as file XMLtoPDFBook2.py) to my xtopdf project on Bitbucket.
Enjoy.
- Vasudev Ram - Dancing Bison Enterprises
Contact Page


No comments:
Post a Comment
Please be on-topic and civil in your comments. Comments not following these guidelines will be deleted.