skert

class IndexActor extends Actor with ActorLogging {

  def receive = {
    case CheckPage(url, html) => if(checkUrlExistance(url) == false) sender ! ParsePage(url, html)
    case CheckLink(url) => if(checkUrlExistance(url) == false) sender ! QueueLink(url)
    case Word(url, word) => {
      addDoc(url)
      addWord(word)
      val docid = getDocId(url)
      val wordid = getWordId(word)
      println(docid + " " + wordid + " about to index")
      addIndex(wordid, docid)
    }
  }

  def addDoc(url: String): Int = {
    //println(checkUrlExistance(url) == false)
    //println(url)
    if(checkUrlExistance(url) == false){
      sql"""
         insert into documents (url) values (${url});
      """.update.apply()
      }
      getDocId(url)
    }

  def addWord(word: String): Int = {
    //println(checkWordExistance(word) == false)
    //println(word)
    if(checkWordExistance(word) == false) {
      sql"""
        insert into words (word) values (${word});
      """.update.apply()
      }
    getWordId(word)
  }

  def getDocId(url: String): Int = {
    sql"""
      select docid from documents where url = ${url};
    """.map(rs => rs.int("docid")).single.apply().get
  }

  def getWordId(word: String): Int = {
    sql"""
      select wordid from words where word = ${word};
    """.map(rs => rs.int("wordid")).single.apply().get
  }

  def addIndex(wordid: Int, docid: Int): Unit = {
    sql"""
      insert into index (docid, wordid) values (${docid}, ${wordid});
    """.update.apply()
  }

  def checkUrlExistance(link: String): Boolean = {
    //println(link)
    val check: List[String] = {
      sql"""
        select url from documents
      """.map(rs => rs.string("url")).list.apply()
      }
      //println(check)
      if(check.isEmpty) false
      else if(check.contains(link)) true
      else false
  }

  def checkWordExistance(word: String): Boolean = {
     val check: List[String] = {
      sql"""
        select word from words
      """.map(rs => rs.string("word")).list.apply()
      }
      //println(check)
      if(check.isEmpty) false
      else if(check.contains(word)) true
      else false
  }

  ///////////////////////////////////////////////////////////////////
  // The code below is a starting point for your queries/updates to
  // the database. We have provided the database creation SQL for
  // you. You will not need to add any additional tables. Your goal
  // is to populate it with data you have received from parsed HTML
  // documents. We strongly suggest that you implement each of your
  // queries as individual methods in this class, where each method
  // corresponds to some query that is useful in building the index.
  ///////////////////////////////////////////////////////////////////

  // Necessary setup for connecting to the H2 database:
  Class.forName("org.h2.Driver")
  ConnectionPool.singleton("jdbc:h2:./indexer", "sa", "")
  implicit val session = AutoSession

  // Create the database when this object is referenced.
  createDatabase

  def createDatabase: Unit = {
    sql"""
      drop table words if exists;
      drop table documents if exists;
      drop table index if exists;
    """.update.apply()

    // Create the tables if they do not already exist:
    sql"""
    create table if not exists words (
      wordid int auto_increment,
      word varchar(50),
      primary key (wordid)
    );
    """.update.apply()

    sql"""
    create table if not exists documents (
      docid int auto_increment,
      url varchar(1024),
      primary key (docid)
    );
    """.update.apply()

    sql"""
    create table if not exists index (
      wordid int,
      docid int,
      foreign key (wordid) references words (wordid) on delete cascade,
      foreign key (docid) references documents (docid) on delete cascade
    );
    """.update.apply()
  }

}

class LinkQueueActor(parseQueue: ActorRef) extends Actor with ActorLogging {
  // We have provided some definitions below which will help you with
  // you implementation. You are welcome to modify these, however, this
  // is what we used for our implementation.
  val queue        = Queue[String]()
  var limit        = 500 //variable, subtract one and make sure its greater than 0

  def receive = {
    case Page(url, html) => {
      //println(url)
      //println(html)
      parseQueue ! Page(url, html)
    }
    case NeedLink => {
      //println(limit)
      //println(queue.isEmpty)
        if(limit == 0) context.system.shutdown()
        if(queue.isEmpty) sender ! NoLinks
        else {
            limit = limit - 1
            sender ! FetchLink(queue.dequeue())
        }
    }
    case QueueLink(url) => queue += url
  }
}


class FetchActor(queue: ActorRef) extends Actor with ActorLogging {

  // This message will start off the process of fetching
  // links from the QueueActor. We include this for you!
  queue ! NeedLink

  def receive = {
    case NoLinks => queue ! NeedLink
    case FetchLink(url) => {
        if(fetch(url).isSuccess) queue ! Page(url, fetch(url).get)
        queue ! NeedLink
    }
  }

  def fetch(url: String): Try[String] =
    Try(Http(url).asString.body)
}

class ParseQueueActor(indexer: ActorRef) extends Actor with ActorLogging {
  var linkQueue: Option[ActorRef] = None
  val queue = Queue[ParsePage]()
  def receive = {
    case Page(url, html) => {
      //println(url)
      //println(html)
      if (linkQueue == None) linkQueue = Some(sender)
      indexer ! CheckPage(url, html)
    }
    case ParsePage(url, html) => queue += ParsePage(url, html) //Response to CheckPage from ParseActor
    case NeedPage => {
      if(queue.isEmpty) sender ! NoPages
      else sender ! queue.dequeue
    }
    case Link(url) => indexer ! CheckLink(url)
    case QueueLink(url) => linkQueue.get ! QueueLink(url)
    case Word(url, word) => {
      //print(url)
      //println(" " + word)
      indexer ! Word(url, word)
    }
  }
}

class ParseActor(pq: ActorRef) extends Actor with ActorLogging {
  log.info("ParseActor created")
  pq ! NeedPage

  def receive = {
    case ParsePage(url, html) => {
        val link = """\"(https?://[^\"]+)""".r
        var linklist = link.findAllIn(html).toList
        linklist = for {link <- linklist; newlinklist = link.substring(1)} yield newlinklist
        linklist.foreach {sender ! Link(_)}
        parse(html).foreach {pq ! Word(url, _)}
        pq ! NeedPage
    }
    case NoPages => pq ! NeedPage
  }
    def parse(parse: String): List[String] = {
        var html = parse;
        html = html.replaceAll("""\"(https?://[^\"]+)""", "")
        html = html.replaceAll("<[^>]*>", "")
        html = html.replaceAll("""[ \t\x0B\f]+""", " ")
        html = html.replaceAll("""(?m)^\s+$""", "")
        html = html.replaceAll("""[^a-zA-Z0-9 ]""", "")
        html = html.replaceAll("""/d""", "")
        val list = html.split(" ").toList.distinct
      //print(list)
          return list
    }
}