Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io._
- import scala.io._
- def time(f: => Unit) = {
- val t1 = System.currentTimeMillis
- f
- ((System.currentTimeMillis - t1)/1000.0)
- }
- def processNewsgroups(rootDir: File): Unit = {
- def write(map: Map[String, Int], file: String)(sort: (Tuple2[String, Int], Tuple2[String, Int]) => Boolean) {
- using(new PrintWriter(new FileWriter(file))) { out =>
- map.toList.sort(sort).foreach { pair => out.println(pair._1 + "\t" + pair._2) }
- }
- }
- def using[Closeable <: {def close(): Unit}, B](closeable: Closeable)(getB: Closeable => B): B =
- try {
- getB(closeable)
- } finally {
- closeable.close()
- }
- implicit def file2String(file: File): String = Source.fromFile(file, "ISO-8859-1").getLines.mkString("\n")
- var counts = (Map.empty[String, Int].withDefaultValue(0) /:
- rootDir.listFiles.filter(_.isDirectory).flatMap(_.listFiles).flatMap(_.toLowerCase.split("""\W+"""))) (
- (c,word) => c + (word -> (1 + c(word))) )
- write(counts, "counts-descreasing-scala.txt") {_._2 > _._2}
- write(counts, "counts-alphabetical-scala.txt") {_._1 < _._1}
- }
- time(processNewsgroups(new File("/Users/danny/Downloads/mini_newsgroups")))
Add Comment
Please, Sign In to add comment