Advertisement
Guest User

Untitled

a guest
Apr 19th, 2019
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.78 KB | None | 0 0
  1. import scala.collection.mutable.HashMap
  2. import scala.collection.mutable.ArrayBuffer
  3.  
  4. sealed trait Symbol[C] extends Ordered[Symbol[C]] {
  5. def isTerminal(): Boolean
  6. }
  7.  
  8. case class CharacterSymbol[C: Ordering](val character: C) extends Symbol[C] {
  9. def isTerminal(): Boolean = false
  10. def compare(that: CharacterSymbol[C]): Int = Ordering[C].compare(this.character, that.character)
  11. def compare(that: TerminalSymbol[C]): Int = -1
  12. override def compare(that: Symbol[C]): Int = that match {
  13. case that: CharacterSymbol[C] => compare(that)
  14. case that: TerminalSymbol[C] => compare(that)
  15. }
  16. override def toString(): String = character.toString()
  17. }
  18.  
  19. case class TerminalSymbol[C: Ordering](val index: Int) extends Symbol[C] {
  20. def isTerminal(): Boolean = true
  21. def compare(that: TerminalSymbol[C]): Int = Ordering[Int].compare(this.index, that.index)
  22. def compare(that: CharacterSymbol[C]): Int = 1
  23. override def compare(that: Symbol[C]): Int = that match {
  24. case that: CharacterSymbol[C] => compare(that)
  25. case that: TerminalSymbol[C] => compare(that)
  26. }
  27. override def toString(): String = "$" + index.toString()
  28. }
  29.  
  30. object Symbols {
  31. def fromString[C: Ordering](string: Seq[C], index: Int): Seq[Symbol[C]] = string.map(c => CharacterSymbol(c)) :+ TerminalSymbol[C](index)
  32. }
  33.  
  34. class State[C: Ordering] { self =>
  35. private val transitionMap: HashMap[C, Transition[C]] = HashMap.empty[C, Transition[C]]
  36. def transitions: Map[C, Transition[C]] = transitionMap.toMap
  37. def getTransitions: Map[C, Transition[C]] = transitions
  38. def getTransition(c: C): Option[Transition[C]] = transitions.get(c)
  39. def addTransition(c: C, transition: Transition[C]) = transitionMap.put(c, transition)
  40.  
  41. private var sourceTransitionOption: Option[Transition[C]] = None
  42. def sourceTransition: Option[Transition[C]] = sourceTransitionOption
  43. def getSourceTransition: Option[Transition[C]] = sourceTransition
  44. def setSourceTransition(transition: Transition[C]) = {
  45. sourceTransitionOption = Some(transition)
  46. }
  47.  
  48. private var suffixLinkOption: Option[State[C]] = None
  49. def suffixLink: Option[State[C]] = suffixLinkOption
  50. def getSuffixLink: Option[State[C]] = suffixLink
  51. def setSuffixLink(suffixLink: State[C]) = {
  52. suffixLinkOption = Some(suffixLink)
  53. }
  54.  
  55. def parent: Option[State[C]] = sourceTransition.map(_.source)
  56. def children: Seq[State[C]] = transitions.values.map(_.dest).toSeq
  57.  
  58. def isLeaf: Boolean = transitions.isEmpty
  59. def inInternal: Boolean = !transitions.isEmpty
  60.  
  61. def isRoot: Boolean = false
  62. def isPerp: Boolean = false
  63.  
  64. private def statesUptoRoot: Iterator[State[C]] = new BufferedIterator[State[C]] {
  65. private var cursor: State[C] = self
  66. override def head: State[C] = cursor
  67. override def hasNext: Boolean = cursor != null
  68. override def next(): State[C] = {
  69. val currentCursor = cursor
  70. cursor = cursor.sourceTransition.map(_.source).getOrElse(null)
  71. currentCursor
  72. }
  73. }
  74.  
  75. def depth: Int = statesUptoRoot.size - 1
  76. def prefix: Seq[C] = sourceTransition.map(_.prefix).getOrElse(Seq.empty[C])
  77.  
  78. def toState: State[C] = this
  79.  
  80. def toDebugString: String = {
  81. val builder = new StringBuilder("root\n")
  82. def addEntryLines(state: State[C], prefix: String): Unit = {
  83. if (!state.transitions.isEmpty) {
  84. for ((first, transition) <- state.transitions.toSeq.sortBy(_._1)) {
  85. builder ++= prefix + "|-" + transition.label + "\n"
  86. addEntryLines(transition.dest, prefix + "| ")
  87. }
  88. }
  89. }
  90. addEntryLines(this, "")
  91. builder.toString()
  92. }
  93.  
  94. def printDebugString: Unit = {
  95. println(toDebugString)
  96. }
  97. }
  98.  
  99. class RootState[C: Ordering] extends State[C] {
  100. override def toString: String = "root"
  101. override def isRoot: Boolean = true
  102. }
  103.  
  104. class PerpState[C: Ordering](val root: RootState[C]) extends State[C] {
  105. private val transitionToRoot = new Transition(Seq.empty[C], this, 0, 0, root)
  106. override def getTransition(c: C): Option[Transition[C]] = Some(transitionToRoot)
  107. override def toString: String = "perp"
  108. override def isPerp: Boolean = true
  109. }
  110.  
  111. class SlicedSeq[A](val original: Seq[A], val from: Int, val until: Int) extends Seq[A] {
  112. def sliced: Seq[A] = original.slice(from, until)
  113. override def apply(idx: Int): A = sliced.apply(idx)
  114. override def iterator: Iterator[A] = sliced.iterator
  115. override def length: Int = sliced.length
  116. }
  117.  
  118. class Transition[C](val sequence: Seq[C], val source: State[C], val start: Int, val end: Int, val dest: State[C])
  119. extends SlicedSeq(sequence, start - 1, end) {
  120. def subsequence: Seq[C] = sequence.slice(start - 1, end)
  121. def prefix: Seq[C] = sequence.slice(0, end)
  122. def label: String = subsequence.map(_.toString).mkString
  123. }
  124.  
  125. class GeneralizedSuffixTree[C: Ordering] {
  126. private val sequenceBuffer: ArrayBuffer[Seq[C]] = ArrayBuffer.empty[Seq[C]]
  127.  
  128. def sequences: Seq[Seq[C]] = sequenceBuffer
  129.  
  130. val root = new RootState[Symbol[C]]()
  131. val perp = new PerpState[Symbol[C]](root)
  132.  
  133. root.setSuffixLink(perp)
  134.  
  135. def addSequence(sequence: Seq[C]): Unit = {
  136. val sequenceIndex = sequences.length
  137.  
  138. sequenceBuffer.append(sequence)
  139.  
  140. val symbols = Symbols.fromString(sequence, sequenceIndex)
  141. val terminalSymbol = symbols.last
  142.  
  143. def inf: Int = symbols.length
  144.  
  145. def t_(i: Int): Symbol[C] = {
  146. symbols(i - 1)
  147. }
  148.  
  149. def createTransition(sequence: Seq[Symbol[C]], from: State[Symbol[C]], startAndEnd: (Int, Int), to: State[Symbol[C]]): Unit = {
  150. val start = startAndEnd._1
  151. val end = startAndEnd._2
  152. val transition = new Transition(sequence, from, start, end, to)
  153. from.addTransition(transition.head, transition)
  154. to.setSourceTransition(transition)
  155. }
  156.  
  157. def createSuffixLink(arg: State[Symbol[C]], res: State[Symbol[C]]): Unit = {
  158. arg.setSuffixLink(res)
  159. }
  160.  
  161. def findTransition(t: Symbol[C], from: State[Symbol[C]]): Option[Transition[Symbol[C]]] = {
  162. from.getTransition(t)
  163. }
  164.  
  165. def splitTransition(gprime: Transition[Symbol[C]], s: State[Symbol[C]], kp: (Int, Int)): State[Symbol[C]] = {
  166. val k = kp._1
  167. val p = kp._2
  168.  
  169. val pprime = gprime.end
  170. val kprime = gprime.start
  171. val sprime = gprime.dest
  172.  
  173. val r = new State[Symbol[C]]()
  174.  
  175. // s -> sprime => s -> r -> sprime
  176. createTransition(gprime.sequence, r, (kprime + p - k + 1, pprime), sprime)
  177. createTransition(gprime.sequence, s, (kprime, kprime + p - k), r)
  178.  
  179. r
  180. }
  181.  
  182. def fprime(s: State[Symbol[C]]): State[Symbol[C]] = {
  183. s.getSuffixLink.get
  184. }
  185.  
  186. def update(state: State[Symbol[C]], ki: (Int, Int)): (State[Symbol[C]], Int) = {
  187. var s = state
  188. var k = ki._1
  189. var i = ki._2
  190. var sk = (s, k)
  191.  
  192. var oldr = root.toState
  193. var endPointAndR = testAndSplit(s, (k, i - 1), t_(i))
  194. var endPoint = endPointAndR._1
  195. var r = endPointAndR._2
  196.  
  197. while (!endPoint) {
  198. val rprime = new State[Symbol[C]]()
  199. createTransition(symbols, r, (i, inf), rprime)
  200. if (oldr != root) {
  201. createSuffixLink(oldr, r)
  202. }
  203. oldr = r
  204. sk = canonize(fprime(s), (k, i - 1))
  205. s = sk._1
  206. k = sk._2
  207. endPointAndR = testAndSplit(s, (k, i - 1), t_(i))
  208. endPoint = endPointAndR._1
  209. r = endPointAndR._2
  210. }
  211. if (oldr != root) {
  212. createSuffixLink(oldr, s)
  213. }
  214. (s, k)
  215. }
  216.  
  217. def testAndSplit(state: State[Symbol[C]], kp: (Int, Int), t: Symbol[C]): (Boolean, State[Symbol[C]]) = {
  218. var s = state
  219. var k = kp._1
  220. var p = kp._2
  221.  
  222. if (k <= p) {
  223. var gprime = findTransition(t_(k), s).get
  224. var pprime = gprime.end
  225. var kprime = gprime.start
  226. var sprime = gprime.to
  227.  
  228. //if (t == t_(kprime + p - k + 1)) {
  229. if (t == gprime(p - k + 1)) {
  230. (true, s)
  231. } else { // split the transition with new state r in between
  232. val r = splitTransition(gprime, s, kp)
  233. (false, r)
  234. }
  235. } else {
  236. val trans = findTransition(t, s)
  237. if (!trans.isDefined) {
  238. (false, s)
  239. } else {
  240. (true, s)
  241. }
  242. }
  243. }
  244.  
  245. def canonize(state: State[Symbol[C]], kp: (Int, Int)): (State[Symbol[C]], Int) = {
  246. var s = state
  247. var k = kp._1
  248. var p = kp._2
  249.  
  250. if (p < k) {
  251. (s, k)
  252. } else {
  253. var gprime = findTransition(t_(k), s).get
  254. var pprime = gprime.end
  255. var kprime = gprime.start
  256. var sprime = gprime.dest
  257. while ((pprime - kprime) <= (p - k)) { // while length of transition is leq length of remains
  258. k = k + pprime - kprime + 1 // proceed k to the length of transition
  259. s = sprime // push state to the target of transition
  260. if (k <= p) { // if remains are still left, find the next transition
  261. gprime = findTransition(t_(k), s).get
  262. pprime = gprime.end
  263. kprime = gprime.start
  264. sprime = gprime.dest
  265. }
  266. }
  267. (s, k)
  268. }
  269. }
  270.  
  271. var s = root.toState
  272. var k = 1
  273. var i = 0
  274. var sk = (s, k)
  275.  
  276. while (t_(i + 1) != terminalSymbol) {
  277. i = i + 1
  278. sk = update(s, (k, i))
  279. s = sk._1
  280. k = sk._2
  281. sk = canonize(s, (k, i))
  282. s = sk._1
  283. k = sk._2
  284. }
  285.  
  286. // add terminal symbol
  287. i = i + 1
  288. sk = update(s, (k, i))
  289. s = sk._1
  290. k = sk._2
  291. sk = canonize(s, (k, i))
  292. s = sk._1
  293. k = sk._2
  294. }
  295.  
  296. def addSequences(sequences: Seq[C]*): Unit = {
  297. for (sequence <- sequences) {
  298. addSequence(sequence)
  299. }
  300. }
  301. }
  302.  
  303. object Application {
  304. def main(args: Array[String]): Unit = {
  305. {
  306. val sequence = "mississippi"
  307. val tree = new GeneralizedSuffixTree[Char]()
  308. tree.addSequence(sequence)
  309. tree.root.printDebugString
  310. val debugString =
  311. """
  312. root
  313. |-i
  314. | |-ppi$0
  315. | |-ssi
  316. | | |-ppi$0
  317. | | |-ssippi$0
  318. | |-$0
  319. |-mississippi$0
  320. |-p
  321. | |-i$0
  322. | |-pi$0
  323. |-s
  324. | |-i
  325. | | |-ppi$0
  326. | | |-ssippi$0
  327. | |-si
  328. | | |-ppi$0
  329. | | |-ssippi$0
  330. |-$0
  331. """
  332. println(debugString.trim == tree.root.toDebugString.trim)
  333. }
  334. }
  335. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement