darkmist

udacity_notes.scala

Dec 31st, 2016
161
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 11.11 KB | None | 0 0
  1. import java.io.{File, IOException, PrintWriter}
  2. import java.security.MessageDigest
  3. import java.time.{LocalDateTime, ZoneId, LocalTime}
  4. import java.time.format.DateTimeFormatter
  5. import java.util.{Date, Locale, UUID}
  6.  
  7. import scala.collection.mutable
  8. import scala.concurrent.duration.{FiniteDuration, _}
  9. import scala.io.{BufferedSource, Source}
  10. import scala.math.BigDecimal.RoundingMode
  11. import scala.util.Try
  12. import scala.xml.Elem
  13.  
  14. /**
  15.  * Converts a folder of transcripts (*.srt) from Udacity to
  16.  * readable notes in html, which can be pasted to Google Docs.
  17.  *
  18.  * Ex.
  19.  * runMain UdacityNotes -i "/Users/kefuzhou/Downloads/P3L5 Non-Functional Reqs & Arch Styles Subtitles"
  20.  * runMain UdacityNotes -i "/Users/kefuzhou/Downloads/Software Architecture & Design Subtitles" -cf -r 1
  21.  */
  22. object UdacityNotes {
  23.  
  24.   case class Data(title: String, body: String, duration: Option[FiniteDuration])
  25.  
  26.   val isDebug: Boolean = false
  27.   println(s"Debugging is ${isDebug}")
  28.   def debug[T](block: => T): Unit = {
  29.     if (isDebug) block
  30.   }
  31.  
  32.   def main(args: Array[String]): Unit = {
  33.     val path = getArgumentValue("-i", args)
  34.     val isCourseFolder = args.contains("-cf")
  35.     val rotateCount = Try(getArgumentValue("-r", args)).map(_.toInt).getOrElse(0)
  36.  
  37.     val lessonFolders = {
  38.       if (isCourseFolder) {
  39.         val dir = new File(path)
  40.         assert(dir.isDirectory, s"Dir should be a directory of lesson directories: ${path}")
  41.         val lessonFolders = dir.listFiles().filter(_.isDirectory).toList
  42.         val result = rotate(rotateCount, sortFiles(lessonFolders.toList))
  43.         result
  44.       } else {
  45.         List(new File(path))
  46.       }
  47.     }
  48.  
  49.     println(s"Processing ${lessonFolders.size} lessons")
  50.  
  51.     val folderTitle = new File(path).getName
  52.     val pw = new PrintWriter(new File(s"UdacityNotes_${folderTitle}.html"))
  53.     pw.println("<html><body>")
  54.     pw.println(<h1 class="folderTitleHeader">{s"Folder Title: ${folderTitle}"}</h1>)
  55.     val totalVideoTimeString = {
  56.       val parts = lessonFolders.flatMap(_.listFiles()).map(f => getEndTime(f).map(_.toMillis))
  57.       val numUnknowns = parts.count(_.isEmpty)
  58.       val totalMillis = parts.collect{case Some(endTime) => endTime}.sum.millis
  59.       val localTime = LocalTime.ofNanoOfDay(totalMillis.toNanos)
  60.       val unknownString = {
  61.         if (numUnknowns == 0) ""
  62.         else if (numUnknowns == 1) " + 1 Unknown"
  63.         else s" + ${numUnknowns} Unknowns"
  64.       }
  65.       localTime.toString + unknownString
  66.     }
  67.  
  68.     pw.println(<div>{"Total Video Time: %s".format(totalVideoTimeString)}</div>)
  69.     val timeFormatter = DateTimeFormatter.ofPattern("MM/dd/yyyy h:mm:ssa z").withZone(ZoneId.of("America/New_York"))
  70.     val updateTimeString = timeFormatter.format(LocalDateTime.now())
  71.     pw.println(<div>{"Updated: %s".format(updateTimeString)}</div>)
  72.  
  73.     def surroundText(in: String) = s"\n$in\n"
  74.  
  75.     val tableOfContents = createTableOfContents(lessonFolders)
  76.     pw.println(tableOfContents)
  77.  
  78.     lessonFolders.foreach { folder =>
  79.       val sectionDatas = getFilesSorted(folder.getAbsolutePath).toVector.map(parseFile)
  80.         .map { data =>
  81.         data.copy(body = parseSubtitleText(data.body))
  82.       }
  83.       val numUnknowns = sectionDatas.count(_.duration.isEmpty)
  84.       val totalLessonFolderTime = sectionDatas.map(_.duration).collect { case Some(d) => d.toMillis }.sum.millis
  85.       val unknownString = {
  86.         numUnknowns match {
  87.           case 0 => ""
  88.           case 1 => " + 1 Unknown"
  89.           case n => s" + ${n} Unknowns"
  90.         }
  91.       }
  92.  
  93.       val lessonXml: Elem = {
  94.         <div class="lesson">
  95.           <div class="lessonTitle">
  96.             <h2 class="lessonTitleHeader" id={getLessonId(folder.getName)}>
  97.               <a href={"#%s".format(getTableOfContentsId(folder.getName, ""))}>{surroundText(s"Lesson: ${folder.getName} (${sprintEndTime(totalLessonFolderTime)}${unknownString})")}</a>
  98.             </h2>
  99.           </div>{
  100.           sectionDatas.map { data =>
  101.             <div class="sectionData">
  102.               <h3 class="sectionTitleHeader" id={getSectionId(folder.getName, data.title)}>
  103.                 <a href={"#%s".format(getTableOfContentsId(folder.getName, data.title))}>
  104.                   {surroundText(s"Section: ${data.title} ${data.duration.map(a => "(%s)".format(sprintEndTime(a))).getOrElse("")}")}
  105.                 </a>
  106.               </h3>
  107.             </div>
  108.             <br/>
  109.             <div>{surroundText(wordWrap(data.body, 120))}</div>
  110.             <br/>
  111.           }}</div>
  112.       }
  113. //      val prettyXml = xmlPrettyPrinter.format(lessonXml)
  114.       pw.println(lessonXml)
  115.     }
  116.  
  117.     pw.println("</body></html>")
  118.     pw.close()
  119.  
  120.   }
  121.  
  122.   private def rotate[A](n: Int, ls: List[A]): List[A] = {
  123.     val nBounded = if (ls.isEmpty) 0 else n % ls.length
  124.     if (nBounded < 0) rotate(nBounded + ls.length, ls)
  125.     else (ls drop nBounded) ::: (ls take nBounded)
  126.   }
  127.  
  128.   val xmlPrettyPrinter = new scala.xml.PrettyPrinter(120, 2)
  129.  
  130.   def getArgumentValue(flag: String, args: Array[String]): String = {
  131.     val i = args.indexWhere(_ == flag)
  132.     if (i == -1) {
  133.       throw new IOException(s"Please provide flag ${flag} [value] in argument")
  134.     } else if (i + 1 >= args.length) {
  135.       throw new IOException(s"Please provide value to flag ${flag}")
  136.     } else {
  137.       args(i + 1)
  138.     }
  139.   }
  140.  
  141.  
  142.   def parseSubtitleText(input: String): String = {
  143.  
  144.     def isNumberLine(line: String) = Try(line.toDouble).isSuccess
  145.  
  146.     val lines = Source.fromString(input).getLines().toVector
  147.     val textLines = StringBuilder.newBuilder
  148.     var i = 0
  149.     while (i < lines.size) {
  150.       val line = lines(i).trim
  151.       if (line.isEmpty || isNumberLine(line) || isRangeLine(line) ) {
  152.       } else {
  153.         textLines.append(s"$line ")
  154.       }
  155.       i += 1
  156.     }
  157.     textLines.toString()
  158.   }
  159.  
  160.   private def isRangeLine(line: String) = {
  161.     line.take(8).forall(c => "0123456789:,.".contains(c))
  162.   }
  163.  
  164.   def getFilesSorted(path: String): List[File] = {
  165.     val file = new File(path)
  166.     if (!file.exists()) {
  167.       throw new IOException(s"File not found at: ${file.getAbsolutePath}")
  168.     }
  169.     val files = file.listFiles()
  170.     sortFiles(files)
  171.   }
  172.  
  173.   def sortFiles(files: Seq[File]): List[File] = {
  174.     val possibleNumbers = Try(files.map(_.getName.split("-",2).head.filter(c => !c.isWhitespace).toDouble).toVector)
  175.     possibleNumbers match {
  176.       case util.Success(numbers) =>
  177.         files.zip(numbers).sortBy(_._2).map(_._1).toList
  178.       case _ =>
  179.         files.sortBy(_.getName).toList
  180.     }
  181.   }
  182.  
  183.   def parseFile(file: File): Data = {
  184.     val title = getSectionTitleFromFile(file)
  185.     val body = useThenCloseFile(file, _.getLines().mkString("\n"))
  186.     val duration = getEndTime(file)
  187.     Data(title, body, duration)
  188.   }
  189.  
  190.   private def useThenCloseFile[A](file: File, fn: BufferedSource => A) = {
  191.     val b = Source.fromFile(file)
  192.     val result = fn(b)
  193.     b.close()
  194.     result
  195.   }
  196.  
  197.   def getSectionTitleFromFile(file: File): String = {
  198.     val pattern = """\d+ - (.*)\.srt""".r
  199.     val title = pattern.findFirstMatchIn(file.getName).get.group(1).trim
  200.     title
  201.   }
  202.  
  203.   def wordWrap(text: String, maxLength: Int): String = {
  204.     wordWrap(text.split(" "), maxLength)
  205.   }
  206.  
  207.   def wordWrap(tokens: Seq[String], maxLength: Int): String = {
  208.     var spaceLeft = maxLength
  209.     val spaceWidth = 1
  210.     val sb = StringBuilder.newBuilder
  211.     tokens.foreach { word  =>
  212.       if (word.length + spaceWidth > spaceLeft) {
  213.         sb.append(s"\n$word ")
  214.         spaceLeft = maxLength - word.length - spaceWidth
  215.       } else {
  216.         sb.append(s"$word ")
  217.         spaceLeft -= (word.length + spaceWidth)
  218.       }
  219.     }
  220.     val out = sb.toString()
  221.     debug {
  222.       assert(Source.fromString(out).getLines().forall(_.length <= maxLength), "word wrap violation")
  223.     }
  224.     out
  225.   }
  226.  
  227.   def createUUIDString(): String = {
  228.     UUID.randomUUID().toString.replace("-", "")
  229.   }
  230.  
  231.   def convertByteArrayToHexString(arrayBytes: Array[Byte]): String = {
  232.     val sb = mutable.StringBuilder.newBuilder
  233.     arrayBytes.indices.foreach { i =>
  234.       sb.append(Integer.toString((arrayBytes(i) & 0xff) + 0x100, 16)
  235.         .substring(1))
  236.     }
  237.     sb.toString()
  238.   }
  239.  
  240.   def sha1Hash(prefix: String, lessonName: String, sectionName: String): String = {
  241.     val input = s"$lessonName$sectionName"
  242.     val bytes = MessageDigest.getInstance("SHA-1").digest(input.getBytes)
  243.     "%s%s".format(prefix,convertByteArrayToHexString(bytes))
  244.   }
  245.  
  246.   def getSectionId(lessonName: String, sectionName: String) = {
  247.     sha1Hash("s_", lessonName, sectionName)
  248.   }
  249.  
  250.   def getLessonId(lessonName: String) = {
  251.     sha1Hash("l_", lessonName, "")
  252.   }
  253.  
  254.   def getTableOfContentsId(lessonName: String, sectionName: String) = {
  255.     sha1Hash("t_", lessonName, sectionName)
  256.   }
  257.  
  258.   def createTableOfContents(lessonFolders: Seq[File]): String = {
  259.     val xml = <div><ul>{
  260.       lessonFolders.map { lessonFolder =>
  261.         val (totalLessonFolderTime, numUnknowns) = {
  262.           val parts = lessonFolder.listFiles().map(f => getEndTime(f).map(_.toMillis))
  263.           val numUnknowns = parts.count(_.isEmpty)
  264.           (parts.collect{case Some(endTime) => endTime}.sum.millis, numUnknowns)
  265.         }
  266.         val unknownString = {
  267.           numUnknowns match {
  268.             case 0 => ""
  269.             case 1 => " + 1 Unknown"
  270.             case n => s" + ${n} Unknowns"
  271.           }
  272.         }
  273.  
  274.         <div>
  275.           <li>
  276.             <div id={getTableOfContentsId(lessonFolder.getName, "")}>
  277.               <a href={s"#${getLessonId(lessonFolder.getName)}"}>{s"${lessonFolder.getName} (${sprintEndTime(totalLessonFolderTime)}${unknownString})"}</a>
  278.             </div>
  279.           </li>
  280.           <ul>{
  281.             sortFiles(lessonFolder.listFiles().toList).map { sectionFile =>
  282.               val sectionTitle = getSectionTitleFromFile(sectionFile)
  283.               val title = s"$sectionTitle ${getEndTime(sectionFile).map(a => "(%s)".format(sprintEndTime(a))).getOrElse("")}"
  284.               <li>
  285.                 <a id={getTableOfContentsId(lessonFolder.getName, getSectionTitleFromFile(sectionFile))}
  286.                    href={s"#${getSectionId(lessonFolder.getName, getSectionTitleFromFile(sectionFile))}"}>{title}</a>
  287.               </li>
  288.             }
  289.           }</ul>
  290.         </div>
  291.       }
  292.     }</ul></div>
  293. //    xmlPrettyPrinter.format(xml)
  294.     xml.toString
  295.   }
  296.  
  297.   def getEndTime(sectionFile: File): Option[FiniteDuration] = {
  298.     val pattern = """(\d+:\d+:[\d\.,]+)$""".r
  299.     val candidateLinesFromLast: Vector[String] = {
  300.       useThenCloseFile(sectionFile, _.getLines().filter(isRangeLine).toVector.reverse)
  301.     }
  302.     val optTotal: Option[FiniteDuration] = {
  303.       candidateLinesFromLast.view.map(line => pattern.findFirstMatchIn(line).map(_.group(0))).collectFirst {
  304.         case Some(endTimeString) =>
  305.           val Array(hh, mm, ss) = endTimeString.replace(",", ".").split(":").map(_.toDouble)
  306.           val total = hh.hours + mm.minutes + ss.seconds
  307.           total
  308.       }
  309.     }
  310.     optTotal
  311.   }
  312.  
  313.   def sprintEndTime(duration: FiniteDuration): String = {
  314.     val value = BigDecimal(duration.toMillis / 6e4d).setScale(2, RoundingMode.HALF_EVEN).toString
  315.     s"$value minutes"
  316.   }
  317.  
  318. }
Advertisement