Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.jsoup.Jsoup;
- import org.jsoup.helper.Validate;
- import org.jsoup.nodes.Document;
- import org.jsoup.nodes.Element;
- import org.jsoup.select.Elements;
- import java.io.BufferedOutputStream;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.net.URL;
- /**
- * Example program to list links from a URL.
- */
- public class FlipkartFetcher {
- static String folderPath="C:\Users\Nobal\Desktop\MyImages";
- public static void main(String[] args) throws IOException {
- // Validate.isTrue(args.length == 1, "usage: supply url to fetch");
- //String url = args[0];
- String url="http://www.flipkart.com/mens-clothing/shirts/formal-shirts/pr?sid=2oq%2Cs9b%2Cmg4%2Cfh5&otracker=nmenu_sub_men_0_Formal+Shirts";
- //print("Fetching %s...", url);
- Document doc = Jsoup.connect(url).get();
- Elements divTags=doc.getElementsByClass("pu-visual-section");
- int counter=0;
- for(Element divTag : divTags)
- {
- counter++;
- //System.out.println(image.getElementsByTag("a"));
- Elements aTags=divTag.getElementsByTag("a");
- for(Element aTag : aTags )
- {
- if(aTag.child(0).attr("src").contains(".jp")){
- System.out.println(aTag.child(0).attr("src"));
- getImages(aTag.child(0).attr("src"));
- }
- }
- }
- /* Elements links = doc.select("a[href]");
- Elements media = doc.select("[src]");
- Elements imports = doc.select("link[href]");
- print("nMedia: (%d)", media.size());
- for (Element src : media) {
- if (src.tagName().equals("img"))
- print(" * %s: <%s> %sx%s (%s)",
- src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
- trim(src.attr("alt"), 20));
- else
- print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
- }
- print("nImports: (%d)", imports.size());
- for (Element link : imports) {
- print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel"));
- }
- print("nLinks: (%d)", links.size());
- for (Element link : links) {
- print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
- }*/
- }
- private static void getImages(String src) throws IOException {
- String folder = null;
- //Exctract the name of the image from the src attribute
- int indexname = src.lastIndexOf("/");
- if (indexname == src.length()) {
- src = src.substring(1, indexname);
- }
- indexname = src.lastIndexOf("/");
- String name = src.substring(indexname, src.length());
- System.out.println(name);
- //Open a URL Stream
- URL url = new URL(src);
- InputStream in = url.openStream();
- OutputStream out = new BufferedOutputStream(new FileOutputStream( folderPath+ name));
- for (int b; (b = in.read()) != -1;) {
- out.write(b);
- }
- out.close();
- in.close();
- }
- /* private static void print(String msg, Object... args) {
- System.out.println(String.format(msg, args));
- }
- private static String trim(String s, int width) {
- if (s.length() > width)
- return s.substring(0, width-1) + ".";
- else
- return s;
- }*/
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement