Guest User

Untitled

a guest
Jul 23rd, 2018
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.87 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. # roff(1) Line Mangler And Organizer
  4. #
  5. # Reflow a roff (source) document without damaging it:
  6. #
  7. # - Comments are left intact
  8. # - Commands are left intact
  9. # - Known "Verbatim blocks" are left intact
  10. # - tbl(1) Tables
  11. # - Unflowed sections
  12.  
  13. import sys
  14. import textwrap
  15.  
  16.  
  17. # dict of verbatim blocks start command --> end command
  18. VERBATIM_BLOCKS = {'.TS': '.TE', # Table Start / Table End
  19. '.nf': '.fi', # No-fill / Fill
  20. }
  21.  
  22.  
  23. # line-tokens
  24. #
  25. # COMMENT - roff comment, .\" or '\"
  26. #
  27. # TEXT - plain text
  28. #
  29. # COMMAND - a roff command (must begin a line)
  30. #
  31. # VERBATIM - Part of a verbatim block (see VERBATIM_BLOCKS)
  32. #
  33. # BLANK - Blank lines in roff input are significant. If we treat them as TEXT
  34. # they may be flowed out of existence, but they almost always exist in
  35. # input for their affect on output; instead treat them as a command
  36. # ("insert a blank line"), which seems logical and gives the right
  37. # behaviour.
  38. COMMENT, TEXT, COMMAND, VERBATIM, BLANK = range(5)
  39.  
  40.  
  41. def tokenize(inp):
  42. """'tokenize' an nroff page on INP.
  43.  
  44. returns a list of lists [TOKEN, 'text'], runs of the same token are packed
  45. together."""
  46.  
  47. ret = []
  48. lasttok = None
  49.  
  50. # Stack of verbatim block ending commands in the order we need to see them
  51. # to leave the block. Also treated as bool to indicate lines should be
  52. # passed verbatim.
  53. inverb = []
  54.  
  55. for line in inp:
  56. tok = None
  57.  
  58. if (line.startswith('.\\"') or line.startswith('\'\\"')):
  59. tok = COMMENT
  60. if not line or line.isspace():
  61. tok = BLANK
  62. elif line[0] in (".", "'"):
  63. tok = COMMAND
  64. command = line.split()[0]
  65.  
  66. # command starts a verbatim block
  67. if command in VERBATIM_BLOCKS:
  68. inverb.append(VERBATIM_BLOCKS[command])
  69. elif inverb and command == inverb[-1]:
  70. inverb.pop()
  71. else:
  72. tok = inverb and VERBATIM or TEXT
  73.  
  74. if tok == lasttok:
  75. ret[-1][1] += line
  76. else:
  77. ret.append([tok, line])
  78.  
  79. lasttok = tok
  80.  
  81. return ret
  82.  
  83.  
  84. Wrap = textwrap.TextWrapper(width=79, expand_tabs=False,
  85. replace_whitespace=False,
  86. drop_whitespace=True,
  87. fix_sentence_endings=False,
  88. break_on_hyphens=False)
  89.  
  90.  
  91. def flow_paragraph(text):
  92. newlines = Wrap.wrap(text)
  93.  
  94. # We can't allow a non-COMMAND line to start with a period or a single
  95. # quote, if we wrap a line in such a way that we do, pull the last word of
  96. # the previous line down to prevent it.
  97. for n, line in enumerate(newlines):
  98. while line[0] in (".", "'"):
  99. ns = newlines[n - 1].split(' ')
  100. prev, prep = ns[:-1], ns[-1]
  101.  
  102. newlines[n - 1] = ' '.join(prev)
  103. newlines[n] = "%s %s" % (prep, line)
  104. line = newlines[n]
  105.  
  106. return newlines
  107.  
  108.  
  109. def reflow(lines, outp=sys.stdout):
  110. """Reflow an nroff document, in LINES writing a new document to OUTP
  111. (default: sys.stdout)"""
  112.  
  113. for tok, text in lines:
  114. if tok in (COMMENT, COMMAND, VERBATIM, BLANK):
  115. outp.write(text)
  116. elif tok == TEXT:
  117. outp.write('\n'.join(flow_paragraph(text)) + '\n')
  118. else:
  119. raise Exception("Unknown token value `%s'" % tok)
  120.  
  121.  
  122. if __name__ == '__main__':
  123. if len(sys.argv) != 3:
  124. sys.stderr.write("Usage: rofflmao <infile> <outfile>\n")
  125. sys.exit(2)
  126.  
  127. infile, outfile = sys.argv[1:3]
  128.  
  129. if infile == outfile:
  130. sys.stderr.write("Input and output must differ\n")
  131. sys.exit(1)
  132.  
  133. with open(infile, 'r') as f:
  134. # sys.stdout.write(''.join(map(lambda (x,y): "%s: %s" % (x, y),
  135. # tokenize(f))))
  136. with open(outfile, 'w') as n:
  137. reflow(tokenize(f), outp=n)
Add Comment
Please, Sign In to add comment