webscrapping htmlcleaner

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

//REMOVE IF NOT USING NETBEANS IDE
package testscrapping;

/**
 *
 * @author ranveer
 */
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;


import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;

public class TestScrapping {

    public static void main(String[] args) throws MalformedURLException, IOException, XPatherException {

        // URL to be fetched in the below url u can replace s=cantabil with company of ur choice
        String url_fetch = "http://in.finance.yahoo.com/lookup?s=cantabil&t=A&b=0&m=IN";

        //create tagnode object to traverse XML using xpath
        TagNode node;
        String info = null;

        //XPath of the data to be fetched.....use firefox's firepath addon or use firebug to fetch the required XPath.
        //the below XPath will display the title of the company u have queried for
        String name_xpath = "//div[1]/div[2]/div[2]/div[1]/div/div/div/div/table/tbody/tr[1]/td[2]/text()";

         // declarations related to the api
        HtmlCleaner cleaner = new HtmlCleaner();
        CleanerProperties props = new CleanerProperties();
        props.setAllowHtmlInsideAttributes(true);
        props.setAllowMultiWordAttributes(true);
        props.setRecognizeUnicodeChars(true);
        props.setOmitComments(true);


        //creating url object
        URL url = new URL(url_fetch);
        URLConnection conn = url.openConnection(); //opening connection
        node = cleaner.clean(new InputStreamReader(conn.getInputStream()));//reading input stream

        //storing the nodes belonging to the given xpath
        Object[] info_nodes = node.evaluateXPath(name_xpath);


//checking if something returned or not....if XPath invalid info_nodes.length=0
        if (info_nodes.length > 0) {
            //info_nodes[0] will return string buffer
            StringBuffer str = new StringBuffer();
            str.append(info_nodes[0]);
            System.out.println(str);

        }


    }
}