Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /opt/local/bin/ruby1.9 -w
- # -*- coding: utf-8 -*-
- # get_tag_and_class.rb: extract tag and class names from a HTML document.
- regex = Regexp.compile('<(\w+)[^<>]*class=[\'\"]([^\'\"]+)[\'\"][^<>]*>')
- result = Array.new
- Tuple = Struct.new(:line_number, :tag, :class)
- line_number = 0
- STDIN.each { |line|
- line_number += 1
- offset = 0
- while offset < (line.length - 1)
- md = regex.match(line, offset)
- if md
- result.push Tuple.new(line_number, md[1], md[2])
- offset = md.offset(0)[1] + 1
- else
- break
- end
- end
- }
- result.each { |t|
- STDOUT.puts "#{t.line_number}: #{t.tag}.#{t.class}"
- }
Add Comment
Please, Sign In to add comment