Advertisement
Hajto

Attempt to scala web scrapper

May 22nd, 2015
234
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 1.28 KB | None | 0 0
  1. import model.FuneralSchedule
  2. import play.api.libs.json.Json
  3. import scala.io.Source
  4.  
  5. var date = "2015-05-05"
  6. val source = Source.fromURL("http://zck.krakow.pl/?pageId=16&date=" + date).mkString
  7. val regex = "(?s)<table>.+?(Cmentarz.+?)<.+?</table>".r
  8. var thing: List[FuneralSchedule] = List()
  9. var jsonFeed: List[Funeral] = List()
  10. val regMatcher = "("
  11.  
  12. case class Funeral(hour: String, who: String, age: String) {
  13.   override def toString: String = {
  14.     "Cos"
  15.   }
  16. }
  17.  
  18. //implicit val format = Json.format[Funeral]
  19. val out = regex.findAllIn(source).matchData foreach { table =>
  20.   thing ::= FuneralSchedule(table.group(1), clearStrings(table.group(0)))
  21.   """<tr\s?>.+?</\s?tr>""".r.findAllIn(clearStrings(table.group(0))).matchData foreach { tr =>
  22.     //TODO: Naprawic bo szlak trafia wydajnosc
  23.     val temp = """<td\s?>.+?</\s?td>""".r.findAllIn(tr.group(0)).matchData.foreach {
  24.       elem => println(elem)
  25.     }
  26.     //println(Json.toJson(thingy))
  27.   }
  28.   println("Koniec tabeli")
  29. }
  30. thing
  31. //Json.toJson(jsonFeed)
  32. println(removeMarkers("<td > <td> Marian Debil </ td>"))
  33. def removeMarkers(s: String) = {
  34.   s.replaceAll( """(</?\s?td\s?>)""", "")
  35. }
  36. def clearStrings(s: String) = {
  37.   val regex = "((class=\".+?\")|(id=\".+?\")|(style=\".+?\")|(\\n))"
  38.   s.replaceAll(regex, "")
  39. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement