Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 3rd, 2012  |  syntax: None  |  size: 1.15 KB  |  hits: 16  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. how to parse title and image both using htmlparser
  2. enter code here
  3.  
  4. private Parser htmlParser;
  5. private TagNameFilter tagNameFilter = new TagNameFilter("IMG");
  6. private TagNameFilter titleNameFilter = new TagNameFilter("TITLE");
  7. List<String> imageList = new ArrayList<String>();
  8. htmlParser.setResource(Url);
  9.      NodeList title = htmlParser.parse(titleNameFilter);
  10.         Node node = title.elementAt(0);
  11.         TitleTag title1 = (TitleTag) node;
  12.      NodeList imagesrc= htmlParser.parse(srcNameFilter);
  13.         for (SimpleNodeIterator iterator = imagesrc.elements(); iterator.hasMoreNodes(); ) {
  14.             Tag tag = (Tag) iterator.nextNode();
  15.             imageList.add(fetchImageSrcFromTag(resource, tag));}
  16.  
  17. private String fetchImageSrcFromTag(URL resource, Tag tag) {
  18.     String imageSrc = tag.getAttribute("src");
  19.     String baseUrl;
  20.     if (imageSrc != null) {
  21.         if (!imageSrc.startsWith("http://")) {
  22.             baseUrl = resource.getProtocol() + "://" + resource.getHost();
  23.             if (!imageSrc.startsWith("/")) baseUrl = baseUrl + "/";
  24.             imageSrc = baseUrl + imageSrc;
  25.         }
  26.     } else {
  27.         imageSrc = "";
  28.     }
  29.  
  30.     return imageSrc;
  31. }