Advertisement
Guest User

Untitled

a guest
Aug 11th, 2009
190
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.55 KB | None | 0 0
  1. In [1]: import py_streamhtmlparser
  2. parser = py_streamhtmlparser.HtmlParser()
  3. html = """<html><body><a href='http://google.com'>link</a></body></html>"""
  4. for index, character in enumerate(html):
  5. parser.Parse(character)
  6. print index, character, parser.Tag(), parser.Attribute(), parser.Value(), parser.ValueIndex()
  7.  
  8.  
  9. In [2]: ...: ...: ...: ...: ...: Out[2]: 1
  10. 0 < None None None None
  11. Out[2]: 1
  12. 1 h None None None None
  13. Out[2]: 1
  14. 2 t None None None None
  15. Out[2]: 1
  16. 3 m None None None None
  17. Out[2]: 1
  18. 4 l None None None None
  19. Out[2]: 0
  20. 5 > html None None None
  21. Out[2]: 1
  22. 6 < html None None None
  23. Out[2]: 1
  24. 7 b None None None None
  25. Out[2]: 1
  26. 8 o None None None None
  27. Out[2]: 1
  28. 9 d None None None None
  29. Out[2]: 1
  30. 10 y None None None None
  31. Out[2]: 0
  32. 11 > body None None None
  33. Out[2]: 1
  34. 12 < body None None None
  35. Out[2]: 1
  36. 13 a None None None None
  37. Out[2]: 1
  38. 14 a None None None
  39. Out[2]: 2
  40. 15 h a None None
  41. Out[2]: 2
  42. 16 r a None None
  43. Out[2]: 2
  44. 17 e a None None
  45. Out[2]: 2
  46. 18 f a None None
  47. Out[2]: 3
  48. 19 = a href href 0
  49. Out[2]: 3
  50. 20 ' a href href 0
  51. Out[2]: 3
  52. 21 h a href h 1
  53. Out[2]: 3
  54. 22 t a href ht 2
  55. Out[2]: 3
  56. 23 t a href htt 3
  57. Out[2]: 3
  58. 24 p a href http 4
  59. Out[2]: 3
  60. 25 : a href http: 5
  61. Out[2]: 3
  62. 26 / a href http:/ 6
  63. Out[2]: 3
  64. 27 / a href http:// 7
  65. Out[2]: 3
  66. 28 g a href http://g 8
  67. Out[2]: 3
  68. 29 o a href http://go 9
  69. Out[2]: 3
  70. 30 o a href http://goo 10
  71. Out[2]: 3
  72. 31 g a href http://goog 11
  73. Out[2]: 3
  74. 32 l a href http://googl 12
  75. Out[2]: 3
  76. 33 e a href http://google 13
  77. Out[2]: 3
  78. 34 . a href http://google. 14
  79. Out[2]: 3
  80. 35 c a href http://google.c 15
  81. Out[2]: 3
  82. 36 o a href http://google.co 16
  83. Out[2]: 3
  84. 37 m a href http://google.com 17
  85. Out[2]: 1
  86. 38 ' a None None None
  87. Out[2]: 0
  88. 39 > a None None None
  89. Out[2]: 0
  90. 40 l a None None None
  91. Out[2]: 0
  92. 41 i a None None None
  93. Out[2]: 0
  94. 42 n a None None None
  95. Out[2]: 0
  96. 43 k a None None None
  97. Out[2]: 1
  98. 44 < a None None None
  99. Out[2]: 1
  100. 45 / None None None None
  101. Out[2]: 1
  102. 46 a None None None None
  103. Out[2]: 0
  104. 47 > None None None None
  105. Out[2]: 1
  106. 48 < None None None None
  107. Out[2]: 1
  108. 49 / None None None None
  109. Out[2]: 1
  110. 50 b None None None None
  111. Out[2]: 1
  112. 51 o None None None None
  113. Out[2]: 1
  114. 52 d None None None None
  115. Out[2]: 1
  116. 53 y None None None None
  117. Out[2]: 0
  118. 54 > None None None None
  119. Out[2]: 1
  120. 55 < None None None None
  121. Out[2]: 1
  122. 56 / None None None None
  123. Out[2]: 1
  124. 57 h None None None None
  125. Out[2]: 1
  126. 58 t None None None None
  127. Out[2]: 1
  128. 59 m None None None None
  129. Out[2]: 1
  130. 60 l None None None None
  131. Out[2]: 0
  132. 61 > None None None None
  133.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement