Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
- //REMOVE IF NOT USING NETBEANS IDE
- package testscrapping;
- /**
- *
- * @author ranveer
- */
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.net.URLConnection;
- import org.htmlcleaner.CleanerProperties;
- import org.htmlcleaner.HtmlCleaner;
- import org.htmlcleaner.TagNode;
- import org.htmlcleaner.XPatherException;
- public class TestScrapping {
- public static void main(String[] args) throws MalformedURLException, IOException, XPatherException {
- // URL to be fetched in the below url u can replace s=cantabil with company of ur choice
- String url_fetch = "http://in.finance.yahoo.com/lookup?s=cantabil&t=A&b=0&m=IN";
- //create tagnode object to traverse XML using xpath
- TagNode node;
- String info = null;
- //XPath of the data to be fetched.....use firefox's firepath addon or use firebug to fetch the required XPath.
- //the below XPath will display the title of the company u have queried for
- String name_xpath = "//div[1]/div[2]/div[2]/div[1]/div/div/div/div/table/tbody/tr[1]/td[2]/text()";
- // declarations related to the api
- HtmlCleaner cleaner = new HtmlCleaner();
- CleanerProperties props = new CleanerProperties();
- props.setAllowHtmlInsideAttributes(true);
- props.setAllowMultiWordAttributes(true);
- props.setRecognizeUnicodeChars(true);
- props.setOmitComments(true);
- //creating url object
- URL url = new URL(url_fetch);
- URLConnection conn = url.openConnection(); //opening connection
- node = cleaner.clean(new InputStreamReader(conn.getInputStream()));//reading input stream
- //storing the nodes belonging to the given xpath
- Object[] info_nodes = node.evaluateXPath(name_xpath);
- //checking if something returned or not....if XPath invalid info_nodes.length=0
- if (info_nodes.length > 0) {
- //info_nodes[0] will return string buffer
- StringBuffer str = new StringBuffer();
- str.append(info_nodes[0]);
- System.out.println(str);
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement