Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Created by thang_dx on 8/28/17.
- */
- import org.jsoup.Jsoup
- import scala.util.Try
- object main {
- def getBlogByUrl(url: String) = {
- val doc = Jsoup.connect(url).get()
- val title = doc.select("h1.entry-title").first()
- val author = doc.select("span.author.vcard").first()
- val publishAt = doc.select("span.timestamp.updated").first()
- val category = doc.select("span.postcateg").first()
- println("Title: "+title.text)
- println("Author: "+author.text)
- println("Publish At: "+publishAt.text)
- println("Category: "+category.text)
- println("=================")
- }
- def getBlogsByPage(url: String, page: Int = 1) = {
- val urlByPage = page match{
- case 1 => url
- case _ => url+"/page/a"+page
- }
- val doc = Jsoup.connect(urlByPage).get()
- val blogs = doc.select("h2.entry-title")
- blogs.forEach(
- blog => getBlogByUrl(blog.select("a").first().attr("href"))
- )
- }
- def getNumberOfPage(url: String): Try[Int] = Try {
- val doc = Jsoup.connect(url).get().select("a.page-numbers")
- doc.get(doc.size()-2).text.toInt
- }
- def main(args: Array[String]): Unit = {
- val url = "http://labs.septeni-technology.jp"
- (for {
- pages <- getNumberOfPage(url)
- } yield {
- for( page <- 1 to pages){
- println("Page "+ page+ ":")
- getBlogsByPage(url, page)
- }
- }).getOrElse(println("Have Problem!!!"))
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement