Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package jcrawler;
- import java.io.IOException;
- import java.io.StringReader;
- import java.util.List;
- import java.util.ArrayList;
- import javax.swing.text.html.parser.ParserDelegator;
- import javax.swing.text.html.HTMLEditorKit.ParserCallback;
- import javax.swing.text.html.HTML.Tag;
- import javax.swing.text.html.HTML.Attribute;
- import javax.swing.text.MutableAttributeSet;
- public class HTMLUtils {
- private HTMLUtils() {}
- public static List<String> extractLinks(String buffer) throws IOException {
- final ArrayList<String> list = new ArrayList<String>();
- StringReader myStringReader = new StringReader(buffer);
- ParserDelegator parserDelegator = new ParserDelegator();
- ParserCallback parserCallback = new ParserCallback() {
- @Override
- public void handleText(final char[] data, final int pos) { }
- @Override
- public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos) {
- if (tag == Tag.A) {
- String address = (String) attribute.getAttribute(Attribute.HREF);
- list.add(address);
- }
- }
- @Override
- public void handleEndTag(Tag t, final int pos) { }
- @Override
- public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos) { }
- @Override
- public void handleComment(final char[] data, final int pos) { }
- @Override
- public void handleError(final java.lang.String errMsg, final int pos) { }
- };
- parserDelegator.parse(myStringReader, parserCallback, true);
- return list;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement