Advertisement
FancyKing

解析并提取HTML 元素(一)

Apr 3rd, 2020
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 1.30 KB | None | 0 0
  1. package step2;
  2. import java.io.File;
  3. import java.io.IOException;
  4. import org.jsoup.Jsoup;
  5. import org.jsoup.nodes.Document;
  6. import org.jsoup.nodes.Element;
  7. import org.jsoup.select.Elements;
  8. public class Task {
  9.    
  10.     //通过filePath文件路径获取Docment对象
  11.     public Document getDoc1(String url) throws IOException{
  12.       File file=new File("./backups/www.ctrip.com.txt");
  13.        Document document =Jsoup.parse(file,"UTF-8","http://www.ctrip.com/");
  14.    
  15.        return document ;
  16.     }
  17.  
  18.     //获取“http://you.ctrip.com/”的Docment对象
  19.     public Document getDoc2(String url) throws IOException{
  20.         File file=new File("./backups/you.ctrip.com.txt");
  21.         Document document =Jsoup.parse(file,"UTF-8","http://you.ctrip.com");
  22.        
  23.         return document ;
  24.     }
  25.  
  26.  
  27.     //获取所有链接
  28.  
  29.     public Elements getLinks(Document doc){
  30.        Elements links=doc.select("link[href]");
  31.         return links;
  32.     }
  33.    
  34.     //获取第一个class为“pop_attention”的div
  35.     public Element getDiv(Document doc){
  36.        Element element =doc.select("div.pop_attention").first();
  37.         return element ;
  38.     }
  39.    
  40.     //获取所有li之后的i标签
  41.     public Elements getI(Document doc){
  42.      Elements element =doc.select("li>i");
  43.         return element ;
  44.     }
  45.    
  46. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement