Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- '''
- This file is not free software. You are allowed to freely copy,
- distribute and modify it, as long as this copyright notice
- and license conditions remains.
- You are NOT given permission to execute this program.
- This file is not even intended to be a running program at all.
- It's intended to be a basis for educational discussions about
- using latex as a generator for beautiful documents.
- '''
- raise NotImplementedError("You are not allowed to execute this file!")
- import os
- import re
- import shutil
- import urllib2
- _sources = [\
- ('work/t', 'http://nostarch.com/autotools.htm'),\
- ('work/t', 'http://nostarch.com/sites/default/files/imagecache/product_full/autotools_big.png'),\
- ('work/c0', 'http://www.freesoftwaremagazine.com/books/autotools_a_guide_to_autoconf_automake_libtool'),\
- \
- ('work/c1', 'http://www.freesoftwaremagazine.com/books/agaal/brief_introduction_to_gnu_autotools'),\
- ('work/c1', 'http://www.freesoftwaremagazine.com/files/www.freesoftwaremagazine.com/nodes/2754/autoconf_ahdr_dataflow.png'),\
- ('work/c1', 'http://www.freesoftwaremagazine.com/files/www.freesoftwaremagazine.com/nodes/2754/aclocal_dataflow.png'),\
- ('work/c1', 'http://www.freesoftwaremagazine.com/files/www.freesoftwaremagazine.com/nodes/2754/automake_libtool_dataflow.png'),\
- ('work/c1', 'http://www.freesoftwaremagazine.com/files/www.freesoftwaremagazine.com/nodes/2754/configure_dataflow.png'),\
- ('work/c1', 'http://www.freesoftwaremagazine.com/files/www.freesoftwaremagazine.com/nodes/2754/make_dataflow.png'),\
- \
- ('work/c2', 'http://www.freesoftwaremagazine.com/books/agaal/gnu_coding_standards_applied_to_autotools'),\
- ('work/c2', 'http://www.freesoftwaremagazine.com/files/www.freesoftwaremagazine.com/nodes/2763/compile_link_process.png'),\
- ('work/c3', 'http://www.freesoftwaremagazine.com/books/agaal/configuring_a_project_with_autoconf'),\
- ('work/c4', 'http://www.freesoftwaremagazine.com/books/agaal/automatically_writing_makefiles_with_autotools'),\
- ('work/c5', 'http://www.freesoftwaremagazine.com/books/agaal/building_shared_libraries_once_using_autotools'),\
- ('work/c5', 'http://www.freesoftwaremagazine.com/files/www.freesoftwaremagazine.com/nodes/2764/exe_load.png'),\
- ('work/c5', 'http://www.freesoftwaremagazine.com/files/www.freesoftwaremagazine.com/nodes/2764/lib_load.png'),\
- ('work/c6', 'http://www.freesoftwaremagazine.com/books/agaal/autotools_example'),\
- ('work/c7', 'http://www.freesoftwaremagazine.com/books/agaal/catalog_of_reusable_solutions'),\
- ('work/c8', 'http://www.freesoftwaremagazine.com/books/agaal/overview_of_m4_overview'),\
- ('work/c9', 'http://www.freesoftwaremagazine.com/books/agaal/reusing_autotools_solutions'),\
- ]
- _chapter_names = ["Foreword"]
- _regex_transforms = [
- ("<div\s*class=\"textbox\">\s*<h3>(.*?)</h3>([\s\S]*?)</div>",\
- '''
- \\\\begin{quotation}
- {\\\\color{gray}\\\\noindent\\\\rule{0.89\\\\textwidth}{2pt}}
- \\\\subsection*{\\1}
- \\2
- {\\\\color{gray}\\\\noindent\\\\rule{0.89\\\\textwidth}{2pt}}
- \\\\end{quotation}
- '''),\
- ("<!--.*?-->", ""),\
- ("<", "<"),\
- (">", ">"),\
- ("&", "&"),\
- ("‘", "`"),\
- ("’", "'"),\
- ("“", "``"),\
- ("”", "''"),\
- ("—", "--"),\
- ("…", "\\\\ldots "),\
- ("<h[23]>(.+)</h[23]>\s*<pre>", "\\\\begin{lstlisting}[caption=\\1]"),\
- ("</pre>", "\n\\\\end{lstlisting}"),\
- ("<dl class=\"image\"><dt><a href=\"/files/www.freesoftwaremagazine.com/nodes/\d*/([^\"]+)\"><img src=\"[^\"]+\" alt=\"Figure \d*: ([^\"]+)\" title=\"[^\"]+\"></a></dt><dd><caption>[^\"]+</caption></dd></dl>",\
- '''
- (see fig. \\\\ref{fig-\\1})
- \\\\begin{figure}[ht!]
- \\\\centering
- \\\\includegraphics[width=\\\\textwidth]{img/\\1}
- \\\\caption{\\2}
- \\\\label{fig-\\1}
- \\\\end{figure}
- '''),\
- ("<a\s+href=\"([^\"]+)\">(.*?)</a>", "\\2\\\\footnote{\\\\url{\\1}}"),\
- ("<p>" , ""),\
- ("</p>", "\n"),\
- ("<h1[^>]*>", "\\\\section{"),\
- ("</h1>", "}"),\
- ("<h2>", "\\\\subsection{"),\
- ("</h2>", "}"),\
- ("<h3>", "\\\\subsubsection{"),\
- ("</h3>", "}"),\
- ("<code>", "\\\\texttt{"),\
- ("</code>", "}"),\
- ("<em>", "\\\\textit{"),\
- ("</em>", "}"),\
- ("<i>", "\\\\textit{"),\
- ("</i>", "}"),\
- ("<b>", "\\\\textbf{"),\
- ("</b>", "}"),\
- ("<strong>", "\\\\textbf{"),\
- ("</strong>", "}"),\
- ("<pre>", "\\\\begin{lstlisting}"),\
- ("</pre>", "\n\\\\end{lstlisting}"),\
- ("<blockquote>", "\\\\begin{quotation}"),\
- ("</blockquote>", "\n\\\\end{quotation}"),\
- ("<ul>", "\\\\begin{itemize}"),\
- ("</ul>", "\\\\end{itemize}"),\
- ("<ol>", "\\\\begin{enumerate}"),\
- ("</ol>", "\\\\end{enumerate}"),\
- ("<li>", "\\\\item "),\
- ("</li>", "\n"),\
- ("Chapter (\d)", "Chapter \\\\ref{chap-\\1}"),\
- ("Chapters (\d), (\d) and (\d)", "Chapters \\\\ref{chap-\\1}, \\\\ref{chap-\\2} and \\\\ref{chap-\\3}"),\
- ("Appendix A", "Chapter \\\\ref{chap-8}"),\
- ("Finally, the References section", "Finally Chapter \\\\ref{chap-9}"),\
- ("\\\\section{About the Author}", "\\\\section*{About the Author}\n\n"),\
- #last resort rules: remove missed html tags:
- ("</?div[^>]*>", ""),\
- ("<hr[^>]*>", "")\
- ]
- #tex destination, html source
- _tex_files = [
- ('work/tex/titletext.tex', 'work/t/autotools.htm'),\
- ('work/tex/00_foreword.tex', 'work/c0/autotools_a_guide_to_autoconf_automake_libtool'),\
- ('work/tex/01_introduction.tex', 'work/c1/brief_introduction_to_gnu_autotools'),\
- ('work/tex/02_gnu_coding_standards.tex', 'work/c2/gnu_coding_standards_applied_to_autotools'),\
- ('work/tex/03_configure.tex', 'work/c3/configuring_a_project_with_autoconf'),\
- ('work/tex/04_automake.tex', 'work/c4/automatically_writing_makefiles_with_autotools'),\
- ('work/tex/05_shared_libs.tex', 'work/c5/building_shared_libraries_once_using_autotools'),\
- ('work/tex/06_example.tex', 'work/c6/autotools_example'),\
- ('work/tex/07_catalog.tex', 'work/c7/catalog_of_reusable_solutions'),\
- ('work/tex/08_m4.tex', 'work/c8/overview_of_m4_overview'),\
- ('work/tex/09_reusing.tex', 'work/c9/reusing_autotools_solutions')\
- ]
- _tex_template =\
- '''
- \documentclass[10pt,titlepage=true,BCOR=10mm,DIV=10]{scrbook}
- %BCOR 10mm for ring binding
- %increased DIV to enlage page area (and reduce pages numbers)
- %be aware that reading long lines is not that easy for the eye
- \usepackage[T1]{fontenc}
- \usepackage[utf8]{inputenc}
- \usepackage[english]{babel}
- \usepackage{hyperref}
- \usepackage[sc]{mathpazo} %palatino
- \linespread{1.05} %palatino wider lines spacing
- \usepackage{listings}
- \usepackage{xcolor}
- \usepackage{graphicx}
- \usepackage[absolute]{textpos}
- \pretolerance=150
- \\tolerance=150
- \setlength{\emergencystretch}{3em}
- \hypersetup{
- colorlinks=true,
- linkcolor=black,
- urlcolor=black
- }
- \lstset{
- breaklines=true,
- breakatwhitespace=true,
- basicstyle=\\ttfamily\small
- }
- \setkomafont{disposition}{\\rmfamily\itshape}
- \\author{John Calcote}
- \\title{Autotools: A Practical Guide To GNU Autoconf, Automake, And Libtool}
- \date{July 2010}
- \\begin{document}
- \pagenumbering{roman}
- \\thispagestyle{empty}
- \\begin{titlepage}
- \\begin{textblock}{297}[0,0](0,0)
- \includegraphics[width=\paperwidth,height=\paperheight]{img/autotools_big_scaled.png}
- \end{textblock}
- \\vspace*{1em}
- \clearpage
- \input{titletext.tex}
- \\vfill
- \\noindent Warning: This book was generated from various unreviewed work-in-progress online sources
- and may contain minor incorrectnesses.
- The real printed book, available from No Starch Press (\\url{http://www.nostarch.com/autotools.htm}) is
- technically reviewed and completed. Consider buying.
- \clearpage
- \\thispagestyle{empty}
- \end{titlepage}
- \maketitle
- \setcounter{page}{1}
- \\tableofcontents
- \listoffigures
- \pagestyle{empty}
- \cleardoublepage
- \pagenumbering{arabic}
- \pagestyle{plain}
- \input{00_foreword.tex}
- \input{01_introduction.tex}
- \input{02_gnu_coding_standards.tex}
- \input{03_configure.tex}
- \input{04_automake.tex}
- \input{05_shared_libs.tex}
- \input{06_example.tex}
- \input{07_catalog.tex}
- \input{08_m4.tex}
- \input{09_reusing.tex}
- \end{document}
- '''
- def mkpath(path):
- if(not os.path.exists(path)):
- os.makedirs(path)
- return
- def fetch_sources():
- if(os.path.exists('work')):
- print 'work directory exists; delete it to make me re-fetch book data'
- else:
- print 'fetching book data...'
- for path,url in _sources:
- mkpath(path)
- #os.system('cd %s && wget \'%s\'' % (path,url))
- print 'downloading %s' % url
- req = urllib2.urlopen(url)
- content = req.read()
- outfilename = '%s/%s' % (path,url.split('/')[-1])
- print ' storing at %s' % outfilename
- outfile = open(outfilename, 'w')
- outfile.write(content)
- outfile.close()
- return
- def make_tex_template():
- print 'writing tex template'
- mkpath('work/tex')
- template = open('work/tex/autotools-guide.tex', 'w')
- template.write(_tex_template)
- template.close()
- return
- def scale_title_image():
- print 'scaling title image'
- os.system('cd work && convert -resize 400\% -colorspace Gray t/autotools_big.png t/autotools_big_scaled.png')
- return
- def read_chapter_titles():
- print 'extracting chapter names'
- for _,src in _tex_files[2:]:
- file = open(src, 'r')
- content = file.read()
- file.close()
- matches = re.findall("<title>Chapter \d+: ([^<]+)</title>", content)
- if(len(matches)):
- _chapter_names.append(matches[0].strip())
- continue
- matches = re.findall("<title>Appendix .+: ([^<]+)</title>", content)
- if(len(matches)):
- _chapter_names.append(matches[0].strip())
- continue
- matches = re.findall("<title>([^<]+)</title>", content)
- if(len(matches)):
- _chapter_names.append(matches[0].strip())
- continue
- return
- def cutncopy_html_bodies():
- fsm_body_open = '<div class="content">'
- fsm_body_close1 = '<div class="book-navigation">'
- fsm_body_close2 = '<h3>Source archive</h3>'
- nost_body_open = 'Download the source code from the book</a></li>\n</ul>'
- nost_body_close = '<div><hr class="separator"><a name="toc">'
- for dest,src in _tex_files:
- infile = open(src, 'r')
- content = infile.read()
- infile.close()
- pos = content.find(fsm_body_open)
- if(pos != -1): #we have a freesoftwaremagazine file
- content = content[pos+len(fsm_body_open):] #cut header garbage
- pos = content.find(fsm_body_close1)
- if(pos!=-1):
- content = content[:pos] #cut footer garbage
- pos = content.find(fsm_body_close2)
- if(pos!=-1):
- content = content[:pos]
- pos = content.find(nost_body_open)
- if(pos != -1): #we have the nostarch titlepage
- content = content[pos+len(nost_body_open):] #cut header garbage
- pos = content.find(nost_body_close)
- content = content[:pos] #cut footer garbage
- outfile = open(dest, 'w')
- outfile.write(content)
- outfile.close()
- return
- def regex_transformations(content):
- print 'applying regex patterns'
- for match,replace in _regex_transforms:
- content = re.subn(match, replace, content)[0]
- return content
- def context_sensitive_latex_escaping(content):
- print 'escaping _ $ and # in non-lstlisting environments'
- #second pass lstlisting/ _ $ and # escaping
- sections = []
- start = 0
- lst_start_token = "\\begin{lstlisting}"
- lst_end_token = "\\end{lstlisting}"
- caption_start_token="[caption="
- caption_end_token="]"
- while True:
- found_start = content.find(lst_start_token, start)
- if(found_start == -1): #no more listing environs
- sections.append((content[start:], True))
- break
- non_escape_start = found_start + len(lst_start_token)
- #we have an additional caption that has to be escaped
- if( content[non_escape_start:].startswith(caption_start_token) ):
- found_caption_end = content.find(caption_end_token, non_escape_start)
- if(found_caption_end != -1):
- non_escape_start = found_caption_end + len(caption_end_token)
- found_end = content.find(lst_end_token, non_escape_start)
- if(found_end == -1): #no more listing environs
- sections.append((content[start:], True))
- break
- non_escape_end = found_end
- sections.append((content[start:non_escape_start], True))
- sections.append((content[non_escape_start:non_escape_end], False))
- sections.append((content[non_escape_end:non_escape_end+len(lst_end_token)], True))
- start = found_end + len(lst_end_token)
- content = []
- for (section,doEsc) in sections:
- if doEsc:
- section = section.replace("_", "\\_")
- section = section.replace("$", "\\$")
- section = section.replace("#", "\\#")
- section = section.replace("...", "\\ldots ")
- content.append(section)
- return "".join(content)
- def ref_label_cleanup(content):
- print 'cleaning labels and refs'
- for token in ["\\ref{", "\\label{"]:
- start = 0
- while True:
- found_start = content.find(token, start)
- if(found_start == -1):
- break
- found_end = content.find("}", found_start)
- if(found_end == -1):
- break
- cleaned = content[found_start+len(token):found_end]
- cleaned = cleaned.replace("\\", "")
- cleaned = cleaned.replace("_", "-")
- cleaned = cleaned.replace(".", "-")
- content = '%s%s%s' % (content[:found_start+len(token)],cleaned,content[found_end:])
- start = found_end
- return content
- def includegraphics_cleanup(content):
- print 'cleaning includegraphics'
- token = "\\includegraphics[width=\\textwidth]{"
- start = 0
- while True:
- found_start = content.find(token, start)
- if(found_start == -1):
- break
- found_end = content.find("}", found_start)
- if(found_end == -1):
- break
- cleaned = content[found_start+len(token):found_end]
- cleaned = cleaned.replace("\\", "")
- content = '%s%s%s' % (content[:found_start+len(token)],cleaned,content[found_end:])
- start = found_end
- return content
- _level_tokens = ["\\chapter", "\\section", "\\subsection", "\\subsubsection"]
- def fix_sectioning(content):
- print "fixing guessable sectioning errors"
- current_level = 0
- current_expected_level = 0
- pos = 0
- start = 0
- while pos != -1:
- next_pos = len(content)
- next_level = 0
- for level,token in enumerate(_level_tokens):
- pos = content.find(token, start)
- if pos == -1:
- continue
- if pos<next_pos:
- next_pos = pos
- next_level = level
- if pos == -1:
- continue
- #print '---'
- #print 'pos: %d' % pos
- #print 'current_level: %d' % current_level
- #print 'current_expected_level: %d' % current_expected_level
- #no level change
- if next_level == current_expected_level:
- #print 'no level change at %d' % current_expected_level
- start = next_pos + len(_level_tokens[next_level])
- continue
- #moving one level down
- if next_level < current_expected_level :
- #print 'down from %s to %s' % (_level_tokens[current_expected_level], _level_tokens[next_level])
- current_expected_level = next_level
- current_level = current_expected_level
- start = next_pos + len(_level_tokens[next_level])
- continue
- #moving one level up
- if current_level == current_expected_level and next_level == current_expected_level + 1 :
- #print 'up from %s to %s' % (_level_tokens[current_expected_level], _level_tokens[next_level])
- current_expected_level = next_level
- current_level = current_expected_level
- start = next_pos + len(_level_tokens[next_level])
- continue
- #there is a jump in the levels
- if current_level == current_expected_level and next_level > current_expected_level + 1:
- #print 'jump from %s to %s' % (_level_tokens[current_expected_level], _level_tokens[next_level])
- current_expected_level += 1
- current_level = next_level
- #now fix broken sectioning
- if current_level != current_expected_level :
- #print 'replacing %s with %s' % (_level_tokens[next_level], _level_tokens[current_expected_level])
- content = "".join([\
- content[:next_pos],\
- content[next_pos:].replace(_level_tokens[next_level], _level_tokens[current_expected_level], 1)])
- start = next_pos + len(_level_tokens[current_expected_level])
- return content
- def html_to_latex():
- print 'begin of html->latex conversion'
- for dest,_ in _tex_files:
- file = open(dest, 'r')
- content = file.read()
- file.close()
- content = regex_transformations(content)
- content = context_sensitive_latex_escaping(content)
- content = ref_label_cleanup(content)
- content = includegraphics_cleanup(content)
- content = fix_sectioning(content)
- file = open(dest, 'w')
- file.write(content)
- file.close()
- return
- def insert_chapters():
- print 'inserting chapter titles'
- count = 0
- for dest,_ in _tex_files[1:]:
- file = open(dest, 'r')
- content = file.read()
- file.close()
- content = '\\chapter{%s}\n\label{chap-%d}\n\n%s' % (_chapter_names[count], count, content)
- count+=1
- file = open(dest, 'w')
- file.write(content)
- file.close()
- return
- def compile_pdflatex():
- print 'compiling twice with pdflatex'
- os.system('cd work/tex && pdflatex autotools-guide.tex && pdflatex autotools-guide.tex')
- return
- def copy_images_to_tex():
- mkpath('work/tex/img')
- for folder,_ in _sources:
- entries = os.listdir(folder)
- for entry in entries:
- if entry.endswith('.png'):
- shutil.copy('%s/%s' % (folder,entry), 'work/tex/img/%s' % (entry))
- return
- def main():
- fetch_sources()
- make_tex_template()
- scale_title_image() #needs convert/imagemagick
- copy_images_to_tex()
- read_chapter_titles()
- cutncopy_html_bodies()
- insert_chapters()
- html_to_latex()
- compile_pdflatex() #needs pdflatex with koma-script and texlive-fontsrecommended (mathpazo/palatino)
- print "\nI'm done.\nIf everythig went well, there should now be a file called autotools-guide.pdf in work/tex/"
- return
- if __name__=="__main__":
- #main()
- raise NotImplementedError("Do you really wan't to chose the dark side?")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement