Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # roff(1) Line Mangler And Organizer
- #
- # Reflow a roff (source) document without damaging it:
- #
- # - Comments are left intact
- # - Commands are left intact
- # - Known "Verbatim blocks" are left intact
- # - tbl(1) Tables
- # - Unflowed sections
- import sys
- import textwrap
- # dict of verbatim blocks start command --> end command
- VERBATIM_BLOCKS = {'.TS': '.TE', # Table Start / Table End
- '.nf': '.fi', # No-fill / Fill
- }
- # line-tokens
- #
- # COMMENT - roff comment, .\" or '\"
- #
- # TEXT - plain text
- #
- # COMMAND - a roff command (must begin a line)
- #
- # VERBATIM - Part of a verbatim block (see VERBATIM_BLOCKS)
- #
- # BLANK - Blank lines in roff input are significant. If we treat them as TEXT
- # they may be flowed out of existence, but they almost always exist in
- # input for their affect on output; instead treat them as a command
- # ("insert a blank line"), which seems logical and gives the right
- # behaviour.
- COMMENT, TEXT, COMMAND, VERBATIM, BLANK = range(5)
- def tokenize(inp):
- """'tokenize' an nroff page on INP.
- returns a list of lists [TOKEN, 'text'], runs of the same token are packed
- together."""
- ret = []
- lasttok = None
- # Stack of verbatim block ending commands in the order we need to see them
- # to leave the block. Also treated as bool to indicate lines should be
- # passed verbatim.
- inverb = []
- for line in inp:
- tok = None
- if (line.startswith('.\\"') or line.startswith('\'\\"')):
- tok = COMMENT
- if not line or line.isspace():
- tok = BLANK
- elif line[0] in (".", "'"):
- tok = COMMAND
- command = line.split()[0]
- # command starts a verbatim block
- if command in VERBATIM_BLOCKS:
- inverb.append(VERBATIM_BLOCKS[command])
- elif inverb and command == inverb[-1]:
- inverb.pop()
- else:
- tok = inverb and VERBATIM or TEXT
- if tok == lasttok:
- ret[-1][1] += line
- else:
- ret.append([tok, line])
- lasttok = tok
- return ret
- Wrap = textwrap.TextWrapper(width=79, expand_tabs=False,
- replace_whitespace=False,
- drop_whitespace=True,
- fix_sentence_endings=False,
- break_on_hyphens=False)
- def flow_paragraph(text):
- newlines = Wrap.wrap(text)
- # We can't allow a non-COMMAND line to start with a period or a single
- # quote, if we wrap a line in such a way that we do, pull the last word of
- # the previous line down to prevent it.
- for n, line in enumerate(newlines):
- while line[0] in (".", "'"):
- ns = newlines[n - 1].split(' ')
- prev, prep = ns[:-1], ns[-1]
- newlines[n - 1] = ' '.join(prev)
- newlines[n] = "%s %s" % (prep, line)
- line = newlines[n]
- return newlines
- def reflow(lines, outp=sys.stdout):
- """Reflow an nroff document, in LINES writing a new document to OUTP
- (default: sys.stdout)"""
- for tok, text in lines:
- if tok in (COMMENT, COMMAND, VERBATIM, BLANK):
- outp.write(text)
- elif tok == TEXT:
- outp.write('\n'.join(flow_paragraph(text)) + '\n')
- else:
- raise Exception("Unknown token value `%s'" % tok)
- if __name__ == '__main__':
- if len(sys.argv) != 3:
- sys.stderr.write("Usage: rofflmao <infile> <outfile>\n")
- sys.exit(2)
- infile, outfile = sys.argv[1:3]
- if infile == outfile:
- sys.stderr.write("Input and output must differ\n")
- sys.exit(1)
- with open(infile, 'r') as f:
- # sys.stdout.write(''.join(map(lambda (x,y): "%s: %s" % (x, y),
- # tokenize(f))))
- with open(outfile, 'w') as n:
- reflow(tokenize(f), outp=n)
Add Comment
Please, Sign In to add comment