
Untitled
By: a guest on
May 3rd, 2012 | syntax:
None | size: 1.15 KB | hits: 16 | expires: Never
how to parse title and image both using htmlparser
enter code here
private Parser htmlParser;
private TagNameFilter tagNameFilter = new TagNameFilter("IMG");
private TagNameFilter titleNameFilter = new TagNameFilter("TITLE");
List<String> imageList = new ArrayList<String>();
htmlParser.setResource(Url);
NodeList title = htmlParser.parse(titleNameFilter);
Node node = title.elementAt(0);
TitleTag title1 = (TitleTag) node;
NodeList imagesrc= htmlParser.parse(srcNameFilter);
for (SimpleNodeIterator iterator = imagesrc.elements(); iterator.hasMoreNodes(); ) {
Tag tag = (Tag) iterator.nextNode();
imageList.add(fetchImageSrcFromTag(resource, tag));}
private String fetchImageSrcFromTag(URL resource, Tag tag) {
String imageSrc = tag.getAttribute("src");
String baseUrl;
if (imageSrc != null) {
if (!imageSrc.startsWith("http://")) {
baseUrl = resource.getProtocol() + "://" + resource.getHost();
if (!imageSrc.startsWith("/")) baseUrl = baseUrl + "/";
imageSrc = baseUrl + imageSrc;
}
} else {
imageSrc = "";
}
return imageSrc;
}