SHARE
TWEET

Untitled

doxuanthang Aug 28th, 2017 76 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /**
  2.   * Created by thang_dx on 8/28/17.
  3.   */
  4.  
  5. import org.jsoup.Jsoup
  6. import scala.util.Try
  7.  
  8. object main {
  9.  
  10.   def getBlogByUrl(url: String) = {
  11.     val doc = Jsoup.connect(url).get()
  12.     val title = doc.select("h1.entry-title").first()
  13.     val author = doc.select("span.author.vcard").first()
  14.     val publishAt = doc.select("span.timestamp.updated").first()
  15.     val category = doc.select("span.postcateg").first()
  16.     println("Title: "+title.text)
  17.     println("Author: "+author.text)
  18.     println("Publish At: "+publishAt.text)
  19.     println("Category: "+category.text)
  20.     println("=================")
  21.   }
  22.  
  23.   def getBlogsByPage(url: String, page: Int = 1) = {
  24.     val urlByPage = page match{
  25.       case 1 => url
  26.       case _ => url+"/page/a"+page
  27.     }
  28.     val doc = Jsoup.connect(urlByPage).get()
  29.     val blogs = doc.select("h2.entry-title")
  30.     blogs.forEach(
  31.       blog => getBlogByUrl(blog.select("a").first().attr("href"))
  32.     )
  33.   }
  34.  
  35.   def getNumberOfPage(url: String): Try[Int] = Try {
  36.     val doc = Jsoup.connect(url).get().select("a.page-numbers")
  37.     doc.get(doc.size()-2).text.toInt
  38.   }
  39.  
  40.   def main(args: Array[String]): Unit = {
  41.     val url = "http://labs.septeni-technology.jp"
  42.     (for {
  43.       pages <- getNumberOfPage(url)
  44.     } yield {
  45.       for( page <- 1 to pages){
  46.         println("Page "+ page+ ":")
  47.         getBlogsByPage(url, page)
  48.       }
  49.     }).getOrElse(println("Have Problem!!!"))
  50.   }
  51.  
  52. }
RAW Paste Data
Top