Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import formatter
- import htmllib, string
- class Writer(formatter.DumbWriter):
- def __init__(self):
- formatter.DumbWriter.__init__(self)
- self.tag = ""
- self.bold = self.italic = 0
- self.fonts = []
- def new_font(self, font):
- if font is None:
- font = self.fonts.pop()
- self.tag, self.bold, self.italic = font
- else:
- self.fonts.append((self.tag, self.bold, self.italic))
- tag, bold, italic, typewriter = font
- if tag is not None:
- self.tag = tag
- if bold is not None:
- self.bold = bold
- if italic is not None:
- self.italic = italic
- def send_flowing_data(self, data):
- if not data:
- return
- atbreak = self.atbreak or data[0] in string.whitespace
- for word in string.split(data):
- if atbreak:
- self.file.write(" ")
- if self.tag in ("h1", "h2", "h3"):
- word = string.upper(word)
- if self.bold:
- word = "*" + word + "*"
- if self.italic:
- word = "_" + word + "_"
- self.file.write(word)
- atbreak = 1
- self.atbreak = data[-1] in string.whitespace
- w = Writer()
- f = formatter.AbstractFormatter(w)
- file = open("samples/sample.htm")
- # print html body as plain text
- p = htmllib.HTMLParser(f)
- p.feed(file.read())
- p.close()
- ## _A_ _CHAPTER._
- ##
- ## Some text. Some more text. Some *emphasised* text. A link[1].
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement