Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- In [1]: import py_streamhtmlparser
- parser = py_streamhtmlparser.HtmlParser()
- html = """<html><body><a href='http://google.com'>link</a></body></html>"""
- for index, character in enumerate(html):
- parser.Parse(character)
- print index, character, parser.Tag(), parser.Attribute(), parser.Value(), parser.ValueIndex()
- In [2]: ...: ...: ...: ...: ...: Out[2]: 1
- 0 < None None None None
- Out[2]: 1
- 1 h None None None None
- Out[2]: 1
- 2 t None None None None
- Out[2]: 1
- 3 m None None None None
- Out[2]: 1
- 4 l None None None None
- Out[2]: 0
- 5 > html None None None
- Out[2]: 1
- 6 < html None None None
- Out[2]: 1
- 7 b None None None None
- Out[2]: 1
- 8 o None None None None
- Out[2]: 1
- 9 d None None None None
- Out[2]: 1
- 10 y None None None None
- Out[2]: 0
- 11 > body None None None
- Out[2]: 1
- 12 < body None None None
- Out[2]: 1
- 13 a None None None None
- Out[2]: 1
- 14 a None None None
- Out[2]: 2
- 15 h a None None
- Out[2]: 2
- 16 r a None None
- Out[2]: 2
- 17 e a None None
- Out[2]: 2
- 18 f a None None
- Out[2]: 3
- 19 = a href href 0
- Out[2]: 3
- 20 ' a href href 0
- Out[2]: 3
- 21 h a href h 1
- Out[2]: 3
- 22 t a href ht 2
- Out[2]: 3
- 23 t a href htt 3
- Out[2]: 3
- 24 p a href http 4
- Out[2]: 3
- 25 : a href http: 5
- Out[2]: 3
- 26 / a href http:/ 6
- Out[2]: 3
- 27 / a href http:// 7
- Out[2]: 3
- 28 g a href http://g 8
- Out[2]: 3
- 29 o a href http://go 9
- Out[2]: 3
- 30 o a href http://goo 10
- Out[2]: 3
- 31 g a href http://goog 11
- Out[2]: 3
- 32 l a href http://googl 12
- Out[2]: 3
- 33 e a href http://google 13
- Out[2]: 3
- 34 . a href http://google. 14
- Out[2]: 3
- 35 c a href http://google.c 15
- Out[2]: 3
- 36 o a href http://google.co 16
- Out[2]: 3
- 37 m a href http://google.com 17
- Out[2]: 1
- 38 ' a None None None
- Out[2]: 0
- 39 > a None None None
- Out[2]: 0
- 40 l a None None None
- Out[2]: 0
- 41 i a None None None
- Out[2]: 0
- 42 n a None None None
- Out[2]: 0
- 43 k a None None None
- Out[2]: 1
- 44 < a None None None
- Out[2]: 1
- 45 / None None None None
- Out[2]: 1
- 46 a None None None None
- Out[2]: 0
- 47 > None None None None
- Out[2]: 1
- 48 < None None None None
- Out[2]: 1
- 49 / None None None None
- Out[2]: 1
- 50 b None None None None
- Out[2]: 1
- 51 o None None None None
- Out[2]: 1
- 52 d None None None None
- Out[2]: 1
- 53 y None None None None
- Out[2]: 0
- 54 > None None None None
- Out[2]: 1
- 55 < None None None None
- Out[2]: 1
- 56 / None None None None
- Out[2]: 1
- 57 h None None None None
- Out[2]: 1
- 58 t None None None None
- Out[2]: 1
- 59 m None None None None
- Out[2]: 1
- 60 l None None None None
- Out[2]: 0
- 61 > None None None None
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement