Advertisement
Guest User

Untitled

a guest
Feb 22nd, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.02 KB | None | 0 0
  1. <td xyz="123"><a href="blah.html">This is a line</a></td>
  2.  
  3. <tr><td>New line</td></tr>
  4. <tr><td xyz="123"><a href="blah.html">CaptureThis</a></td></tr>
  5.  
  6. <tr><td x?y?z?=?"?(ddd)?"?>?<?a?.*?>?(.*?)?<?/?a?>?</td></tr>
  7.  
  8. from xml.etree import ElementTree
  9.  
  10. tree = ElementTree.parse('filename.html')
  11. for elem in tree.findall('tr'):
  12. print ElementTree.tostring(elem)
  13.  
  14. >>> line1
  15. '<tr><td>New line</td></tr>'
  16. >>> line2
  17. '<tr><td xyz="123"><a href="blah.html">CaptureThis</a></td></tr>'
  18. >>> pattern2 = re.compile(r'>([ws]+)<')
  19. >>> pattern2.search(line1).group(1)
  20. 'New line'
  21. >>> pattern2.search(line2).group(1)
  22. 'CaptureThis'
  23.  
  24. >>> pattern = re.compile(r'<tds+w+="([^"]*)">')
  25. >>> pattern.search(line2).group(1)
  26. '123'
  27.  
  28. >>> text = '''<tr><td>New line</td></tr>
  29. <tr><td xyz="123"><a href="blah.html">CaptureThis</a></td></tr>
  30. <tr><td xyz="456">CaptureThisAlso</td></tr>
  31. '''
  32.  
  33. >>> re.findall(r'<tr><td(?: xyz="(d+)")?>(?:<a href=".*?">)?(.*?)(?:</a>)?</td></tr>', text)
  34. [('', 'New line'), ('123', 'CaptureThis'), ('456', 'CaptureThisAlso')]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement