Advertisement
Guest User

Untitled

a guest
Jan 16th, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 8.05 KB | None | 0 0
  1. package net.verdagon.vcompiler.carpenter
  2.  
  3. case class TetrisTable[K, V](
  4.     hasher: K => Int,
  5.     size: Int,
  6.     topLevel: Array[Option[(Int, Int)]],
  7.     combinedBuckets: Array[Option[(K, V)]]) {
  8.   def get(key: K): Option[V] = {
  9.     val hash = hasher(key);
  10.     val indexInTopLevel = hash % size;
  11.     topLevel(indexInTopLevel) match {
  12.       case None => None
  13.       case Some((bucketBeginIndex, bucketSize)) => {
  14.         val indexInBucket = hash % bucketSize;
  15.         val indexInCombinedBuckets = bucketBeginIndex + indexInBucket
  16.         combinedBuckets(indexInCombinedBuckets) match {
  17.           case None => None
  18.           case Some((foundKey, foundValue)) => {
  19.             if (foundKey == key) Some(foundValue) else None
  20.           }
  21.         }
  22.       }
  23.     }
  24.   }
  25. }
  26.  
  27. class TetrisTableGenerator[K, V] {
  28.   def generateTetrisTable(map: Map[K, V], hasher: K => Int): TetrisTable[K, V] = {
  29.     if (map.isEmpty) {
  30.       TetrisTable[K, V](hasher, 0, Array(), Array())
  31.     }
  32.     val hashes = map.keys.zip(map.keys).toMap.mapValues(hasher);
  33.  
  34.     val initialTopLevelSize = Math.ceil(map.size / 4.0).toInt;
  35.     val listBuckets = bucketize(hashes, initialTopLevelSize);
  36.     val hashBuckets = listBuckets.map(listBucket => hashifyBucket(hashes, listBucket))
  37.     val (topLevel, combinedBuckets) = tetris(map, hashes, hashBuckets)
  38.     TetrisTable[K, V](hasher, hashBuckets.length, topLevel, combinedBuckets)
  39.   }
  40.  
  41.   private def tetris(
  42.       map: Map[K, V],
  43.       hashes: Map[K, Int],
  44.       hashBuckets: Array[Array[Option[K]]]):
  45.   (Array[Option[(Int, Int)]], Array[Option[(K, V)]]) = {
  46.     // Note: these bucket pointers currently point at themselves; that is updated inside
  47.     // tetrisInner to point at where the bucket is eventually placed.
  48.     val topLevel: Array[Option[(Int, Int)]] =
  49.       hashBuckets.zipWithIndex.map({
  50.         case (hashBucket, index) => Some((index, hashBucket.length))
  51.       }).toArray
  52.     val numberedHashBuckets =
  53.       hashBuckets.zipWithIndex.map({
  54.         case (hashBucket, number) => (number, hashBucket)
  55.       }).toList
  56.     val sortedNumberedHashBuckets =
  57.       numberedHashBuckets.sortWith({
  58.         case ((_, bucketA), (_, bucketB)) => {
  59.           bucketShouldBeInsertedBefore(bucketA, bucketB)
  60.         }
  61.       })
  62.     tetrisInner(map, hashes, topLevel, Vector(), sortedNumberedHashBuckets)
  63.   }
  64.  
  65.   private def getSpan(bucket: Array[Option[K]]): Int = {
  66.     bucket
  67.         .toList
  68.         .foldLeft(List[Option[K]]())({
  69.           case (Nil, None) => Nil
  70.           case (previous, anything) => previous :+ anything
  71.         })
  72.         .foldRight(List[Option[K]]())({
  73.           case (None, Nil) => Nil
  74.           case (anything, subsequent) => anything :: subsequent
  75.         })
  76.         .length
  77.   }
  78.  
  79.   private def bucketShouldBeInsertedBefore(
  80.       bucketA: Array[Option[K]],
  81.       bucketB: Array[Option[K]]):
  82.   Boolean = {
  83.     val bucketASpan = getSpan(bucketA);
  84.     val bucketBSpan = getSpan(bucketB);
  85.     if (bucketASpan < bucketBSpan) {
  86.       false
  87.     } else if (bucketASpan > bucketBSpan) {
  88.       true
  89.     } else {
  90.       val bucketAMembers = bucketA.count(_.nonEmpty);
  91.       val bucketBMembers = bucketB.count(_.nonEmpty);
  92.       if (bucketAMembers < bucketBMembers) {
  93.         false
  94.       } else if (bucketAMembers > bucketBMembers) {
  95.         true
  96.       } else {
  97.         false
  98.       }
  99.     }
  100.   }
  101.  
  102.   private def tetrisInner(
  103.       map: Map[K, V],
  104.       hashes: Map[K, Int],
  105.       topLevel: Array[Option[(Int, Int)]],
  106.       combinedBuckets: Vector[Option[(K, V)]],
  107.       numberedHashBuckets: List[(Int, Array[Option[K]])]):
  108.   (Array[Option[(Int, Int)]], Array[Option[(K, V)]]) = {
  109.     numberedHashBuckets match {
  110.       case Nil => (topLevel, combinedBuckets.toArray)
  111.       case (thisHashBucketNumber, thisHashBucket) :: remainingHashBuckets => {
  112.         val (newCombinedBuckets, bucketStartIndex) = merge(map, hashes, combinedBuckets, thisHashBucket)
  113.  
  114.         topLevel(thisHashBucketNumber) match {
  115.           case Some((bucketStartIndex, size)) => assert(bucketStartIndex == thisHashBucketNumber)
  116.           case _ => throw new RuntimeException("NO")
  117.         }
  118.  
  119.         val newTopLevelMember = Some(bucketStartIndex, thisHashBucket.length);
  120.         val newTopLevel = topLevel.updated(thisHashBucketNumber, newTopLevelMember)
  121.         tetrisInner(map, hashes, newTopLevel, newCombinedBuckets, remainingHashBuckets)
  122.       }
  123.     }
  124.   }
  125.  
  126.   private def merge(
  127.       map: Map[K, V],
  128.       hashes: Map[K, Int],
  129.       combinedBuckets: Vector[Option[(K, V)]],
  130.       bucket: Array[Option[K]]):
  131.   (Vector[Option[(K, V)]], Int) = {
  132.     mergeInner(map, hashes, combinedBuckets, bucket, 0)
  133.   }
  134.  
  135.   private def mergeInner(
  136.       map: Map[K, V],
  137.       hashes: Map[K, Int],
  138.       tetrisTable: Vector[Option[(K, V)]],
  139.       bucket: Array[Option[K]],
  140.       indexToInsertBucket: Int):
  141.   (Vector[Option[(K, V)]], Int) = {
  142.     val mergesCleanly =
  143.       tetrisTable.drop(indexToInsertBucket).zip(bucket).forall({
  144.         case (None, _) => true
  145.         case (_, None) => true
  146.         case (_, _) => false
  147.       })
  148.     if (mergesCleanly) {
  149.       val tetrisTableEndIndex = tetrisTable.size
  150.       val bucketEndIndexInTetrisTable = indexToInsertBucket + bucket.size
  151.       val tetrisTableNewSize = Math.max(tetrisTableEndIndex, bucketEndIndexInTetrisTable)
  152.       val numEmptiesNeededAtEndOfTetrisTable = tetrisTableNewSize - tetrisTable.size
  153.       val expandedTetrisTable = tetrisTable ++ (0 until numEmptiesNeededAtEndOfTetrisTable).map(_ => None)
  154.       val mergedTetrisTable =
  155.         bucket.zipWithIndex.foldLeft(expandedTetrisTable)({
  156.           case (currentTetrisTable, (bucketMember, indexInBucket)) => {
  157.             val indexInTable = indexToInsertBucket + indexInBucket;
  158.             val currentTableMember = currentTetrisTable(indexInTable)
  159.             val newTableMember =
  160.               (currentTableMember, bucketMember) match {
  161.                 case (tetrisTableMember, None) => tetrisTableMember
  162.                 case (None, Some(key)) => Some((key, map(key)))
  163.               };
  164.             currentTetrisTable.updated(indexInTable, newTableMember)
  165.           }
  166.         })
  167.       (mergedTetrisTable, indexToInsertBucket)
  168.     } else {
  169.       mergeInner(map, hashes, tetrisTable, bucket, indexToInsertBucket + 1)
  170.     }
  171.   }
  172.  
  173.   private def bucketize(hashes: Map[K, Int], topLevelSize: Int): Array[List[K]] = {
  174.     val initialBuckets = (0 until topLevelSize).map(_ => List[K]()).toArray
  175.     val filledBuckets =
  176.       hashes.foldLeft(initialBuckets)({
  177.         case (buckets, (key, hash)) => {
  178.           val index = hash % topLevelSize;
  179.           buckets.updated(index, key :: buckets(index))
  180.         }
  181.       })
  182.     // It's *extremely* unlikely, but if any of the buckets are too large (contain more than a
  183.     // fourth of all the values) then we need to pick a different topLevelSize.
  184.     if (filledBuckets.forall(_.size <= topLevelSize / 4)) {
  185.       // It's fine, return it.
  186.       filledBuckets
  187.     } else {
  188.       // Try again with a slightly larger top level size.
  189.       bucketize(hashes, topLevelSize + 1)
  190.     }
  191.   }
  192.  
  193.   private def hashifyBucket(hashes: Map[K, Int], listBucket: List[K]): Array[Option[K]] = {
  194.     hashifyBucketInner(hashes, listBucket, listBucket.size)
  195.   }
  196.  
  197.   private def hashifyBucketInner(hashes: Map[K, Int], listBucket: List[K], hashBucketSize: Int): Array[Option[K]] = {
  198.     val initialHashBucket: Array[Option[K]] = (0 until hashBucketSize).map(_ => None).toArray
  199.     val resultMaybeHashBucket =
  200.       listBucket.foldLeft[Option[Array[Option[K]]]](Some(initialHashBucket))((maybeHashBucket, key) => {
  201.         maybeHashBucket match {
  202.           case None => None
  203.           case Some(hashBucket) => {
  204.             val index = hashes(key) % hashBucketSize;
  205.             hashBucket(index) match {
  206.               case Some(_) => None
  207.               case None => Some(hashBucket.updated(index, Some(key)))
  208.             }
  209.           }
  210.         }
  211.       })
  212.     resultMaybeHashBucket match {
  213.       case Some(resultHashBucket) => resultHashBucket
  214.       case None => hashifyBucketInner(hashes, listBucket, hashBucketSize + 1)
  215.     }
  216.   }
  217. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement