Advertisement
Guest User

Untitled

a guest
Jan 16th, 2018
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 10.37 KB | None | 0 0
  1. package net.verdagon.vcompiler.carpenter
  2.  
  3. case class ChunkedTetrisTable[K, V](
  4.     hasher: K => Int,
  5.     directory: Array[Option[DirectoryEntry]],
  6.     combinedBuckets: Array[Option[(K, V)]]) {
  7.   def get(key: K): Option[V] = {
  8.     val hash = hasher(key);
  9.     val indexInDirectory = hash % directory.length;
  10.     directory(indexInDirectory) match {
  11.       case None => None
  12.       case Some(DirectoryEntry(bucketBeginIndex, bucketSize)) => {
  13.         val indexInBucket = hash % bucketSize;
  14.         val indexInCombinedBuckets = bucketBeginIndex + indexInBucket
  15.         combinedBuckets(indexInCombinedBuckets) match {
  16.           case None => None
  17.           case Some((foundKey, foundValue)) => {
  18.             if (foundKey == key) Some(foundValue) else None
  19.           }
  20.         }
  21.       }
  22.     }
  23.   }
  24. }
  25.  
  26. case class Table[K](members: Array[Option[K]])
  27. case class BucketTable[K](bucketIndex: Int, table: Table[K])
  28.  
  29. case class DirectoryEntry(indexInTable: Int, size: Int)
  30.  
  31. case class IntermediateTetrisTable[K](
  32.     bucketStartIndexByBucketIndex: Map[Int, Int],
  33.     members: Array[Option[K]])
  34.  
  35. class ChunkedTetrisTableGenerator[K, V] {
  36.   def generateTetrisTable(map: Map[K, V], hasher: K => Int): ChunkedTetrisTable[K, V] = {
  37.     if (map.isEmpty) {
  38.       ChunkedTetrisTable[K, V](hasher, Array(), Array())
  39.     }
  40.     val hashes = map.keys.zip(map.keys).toMap.mapValues(hasher);
  41.  
  42.     val initialDirectorySize = Math.ceil(map.size / 4.0).toInt;
  43.     val listBuckets = bucketize(hashes, initialDirectorySize);
  44.     val tablesByBucketIndex =
  45.       listBuckets.zipWithIndex.map({
  46.         case (listBucket, index) => BucketTable(index, Table[K](hashifyBucket(hashes, listBucket)))
  47.       })
  48.  
  49.     val bucketGroupSizeInitial = 300; // chosen arbitrarily, must experiment
  50.     val numBucketGroups = Math.max(map.size / bucketGroupSizeInitial, 1);
  51.     val hashBucketGroups = groupHashBuckets(tablesByBucketIndex, numBucketGroups);
  52.  
  53.     val directoriesAndTables: List[(Map[Int, DirectoryEntry], Table[K])] =
  54.       hashBucketGroups.map(hashBucketsInGroup => {
  55.         tetris(hashes, hashBucketsInGroup)
  56.       })
  57.  
  58.     val (directoryMap, table) = combineGroups(0, directoriesAndTables)
  59.  
  60.     val directoryArray =
  61.       listBuckets.indices.toArray.map(i => directoryMap.get(i))
  62.  
  63.     val tableWithKeysAndValues =
  64.       table.map(maybeKey => maybeKey.map(key => (key -> map(key))))
  65.  
  66.     ChunkedTetrisTable[K, V](hasher, directoryArray, tableWithKeysAndValues.toArray)
  67.  
  68. //    ChunkedTetrisTable[K, V](hasher, Array(), Array())
  69.   }
  70.  
  71.   private def combineGroups(
  72.       combinedTableSizeSoFar: Int,
  73.       directoriesAndTables: List[(Map[Int, DirectoryEntry], Table[K])]):
  74.   (Map[Int, DirectoryEntry], List[Option[K]]) = {
  75.     directoriesAndTables match {
  76.       case Nil => {
  77.         (Map(), List())
  78.       }
  79.       case (headDirectory, headTable) :: tailDirectoriesAndTables => {
  80.         val combinedTableSizeAfterThis = combinedTableSizeSoFar + headTable.members.length;
  81.         val (tailDirectory, tailCombinedTable) =
  82.           combineGroups(combinedTableSizeAfterThis, tailDirectoriesAndTables)
  83.         val adjustedHeadDirectory =
  84.           headDirectory.mapValues({
  85.             case DirectoryEntry(indexInTable, size) => DirectoryEntry(indexInTable + combinedTableSizeSoFar, size)
  86.           })
  87.         val newDirectory = tailDirectory ++ adjustedHeadDirectory
  88.         val combinedTable = headTable.members.toList ++ tailCombinedTable
  89.         (newDirectory, combinedTable)
  90.       }
  91.     }
  92.   }
  93.  
  94.   private def groupHashBuckets(
  95.       bucketTables: List[BucketTable[K]],
  96.       numBucketGroups: Int):
  97.   List[List[BucketTable[K]]] = {
  98.     val bucketTablesAndScores = bucketTables.map(bucketTable => (bucketTable, getBucketScore(bucketTable)));
  99.  
  100.     val bucketTablesByScore =
  101.       bucketTablesAndScores.foldLeft(Map[(Int, Int), List[BucketTable[K]]]())({
  102.         case (bucketTablesByScore, (bucketTable, score)) => {
  103.           bucketTablesByScore + (score -> (bucketTable :: bucketTablesByScore.getOrElse(score, List())))
  104.         }
  105.       })
  106.  
  107.     val sortedBuckets = bucketTablesByScore.values.reduceRight(_ ++ _)
  108.  
  109.     val numberedHashBuckets =
  110.       sortedBuckets.zipWithIndex.map({
  111.         case (bucketTable, index) => (index % numBucketGroups, bucketTable)
  112.       });
  113.     val initialBucketGroups: Array[List[BucketTable[K]]] =
  114.       (0 until numBucketGroups).map(_ => List()).toArray
  115.     // Round robin insert them into the buckets
  116.     val resultBucketGroups =
  117.       numberedHashBuckets.foldLeft(initialBucketGroups)({
  118.         case (oldBucketGroups, (groupIndex, hashBucket)) => {
  119.           val oldBucketGroup = oldBucketGroups(groupIndex)
  120.           val newBucketGroup = oldBucketGroup :+ hashBucket
  121.           val newBucketGroups = oldBucketGroups.updated(groupIndex, newBucketGroup)
  122.           newBucketGroups
  123.         }
  124.       })
  125.     resultBucketGroups.toList
  126.   }
  127.  
  128.   private def tetris(
  129.       hashes: Map[K, Int],
  130.       hashBuckets: List[BucketTable[K]]):
  131.   (Map[Int, DirectoryEntry], Table[K]) = {
  132.     // hashBuckets is already sorted
  133.     tetrisInner(hashes, Map(), Vector(), hashBuckets)
  134.   }
  135.  
  136.   private def getSpan(bucket: Array[Option[K]]): Int = {
  137.     bucket
  138.         .toList
  139.         .foldLeft(List[Option[K]]())({
  140.           case (Nil, None) => Nil
  141.           case (previous, anything) => previous :+ anything
  142.         })
  143.         .foldRight(List[Option[K]]())({
  144.           case (None, Nil) => Nil
  145.           case (anything, subsequent) => anything :: subsequent
  146.         })
  147.         .length
  148.   }
  149.  
  150.   private def getBucketScore(bucket: BucketTable[K]): (Int, Int) = {
  151.     (getSpan(bucket.table.members), bucket.table.members.count(_.nonEmpty))
  152.   }
  153.  
  154.   private def tetrisInner(
  155.       hashes: Map[K, Int],
  156.       directory: Map[Int, DirectoryEntry],
  157.       combinedTable: Vector[Option[K]],
  158.       hashBuckets: List[BucketTable[K]]):
  159.   (Map[Int, DirectoryEntry], Table[K]) = {
  160.     hashBuckets match {
  161.       case Nil => (directory, Table[K](combinedTable.toArray))
  162.       case thisHashBucket :: remainingHashBuckets => {
  163.         val (newCombinedBuckets, bucketStartIndex) = merge(hashes, combinedTable, thisHashBucket.table)
  164.  
  165. //        directory.get(thisHashBucket.bucketIndex) match {
  166. //          case Some((existingBucketStartIndex, size)) => assert(bucketStartIndex == thisHashBucket.bucketIndex)
  167. //          case _ => throw new RuntimeException("NO")
  168. //        }
  169.         assert(!directory.contains(thisHashBucket.bucketIndex))
  170.  
  171.         val newDirectoryEntry =
  172.           DirectoryEntry(bucketStartIndex, thisHashBucket.table.members.length);
  173.         val newDirectoryMapEntry = (thisHashBucket.bucketIndex -> newDirectoryEntry)
  174.         val newDirectory = (directory + newDirectoryMapEntry)
  175.         tetrisInner(hashes, newDirectory, newCombinedBuckets, remainingHashBuckets)
  176.       }
  177.     }
  178.   }
  179.  
  180.   private def merge(
  181.       hashes: Map[K, Int],
  182.       combinedBuckets: Vector[Option[K]],
  183.       bucket: Table[K]):
  184.   (Vector[Option[K]], Int) = {
  185.     mergeInner(hashes, combinedBuckets, bucket, 0)
  186.   }
  187.  
  188.   private def mergeInner(
  189.       hashes: Map[K, Int],
  190.       tetrisTable: Vector[Option[K]],
  191.       bucket: Table[K],
  192.       indexToInsertBucket: Int):
  193.   (Vector[Option[K]], Int) = {
  194.     val mergesCleanly =
  195.       tetrisTable.drop(indexToInsertBucket).zip(bucket.members).forall({
  196.         case (None, _) => true
  197.         case (_, None) => true
  198.         case (_, _) => false
  199.       })
  200.     if (mergesCleanly) {
  201.       val tetrisTableEndIndex = tetrisTable.size
  202.       val bucketEndIndexInTetrisTable = indexToInsertBucket + bucket.members.length
  203.       val tetrisTableNewSize = Math.max(tetrisTableEndIndex, bucketEndIndexInTetrisTable)
  204.       val numEmptiesNeededAtEndOfTetrisTable = tetrisTableNewSize - tetrisTable.size
  205.       val expandedTetrisTable = tetrisTable ++ (0 until numEmptiesNeededAtEndOfTetrisTable).map(_ => None)
  206.       val mergedTetrisTable =
  207.         bucket.members.zipWithIndex.foldLeft(expandedTetrisTable)({
  208.           case (currentTetrisTable, (bucketMember, indexInBucket)) => {
  209.             val indexInTable = indexToInsertBucket + indexInBucket;
  210.             val currentTableMember = currentTetrisTable(indexInTable)
  211.             val newTableMember =
  212.               (currentTableMember, bucketMember) match {
  213.                 case (tetrisTableMember, None) => tetrisTableMember
  214.                 case (None, Some(key)) => Some(key)
  215.               };
  216.             currentTetrisTable.updated(indexInTable, newTableMember)
  217.           }
  218.         })
  219.       (mergedTetrisTable, indexToInsertBucket)
  220.     } else {
  221.       mergeInner(hashes, tetrisTable, bucket, indexToInsertBucket + 1)
  222.     }
  223.   }
  224.  
  225.   private def bucketize(hashes: Map[K, Int], directorySize: Int): List[List[K]] = {
  226.     val initialBuckets = (0 until directorySize).map(_ => List[K]()).toVector
  227.     val filledBuckets =
  228.       hashes.foldLeft(initialBuckets)({
  229.         case (buckets, (key, hash)) => {
  230.           val index = hash % directorySize;
  231.           buckets.updated(index, key :: buckets(index))
  232.         }
  233.       })
  234.     // It's *extremely* unlikely, but if any of the buckets are too large (contain more than a
  235.     // fourth of all the values) then we need to pick a different directorySize.
  236.     if (filledBuckets.forall(_.size <= directorySize / 4)) {
  237.       // It's fine, return it.
  238.       filledBuckets.toList
  239.     } else {
  240.       // Try again with a slightly larger top level size.
  241.       bucketize(hashes, directorySize + 1)
  242.     }
  243.   }
  244.  
  245.   private def hashifyBucket(hashes: Map[K, Int], listBucket: List[K]): Array[Option[K]] = {
  246.     hashifyBucketInner(hashes, listBucket, listBucket.size)
  247.   }
  248.  
  249.   private def hashifyBucketInner(hashes: Map[K, Int], listBucket: List[K], hashBucketSize: Int): Array[Option[K]] = {
  250.     val initialHashBucket: Array[Option[K]] = (0 until hashBucketSize).map(_ => None).toArray
  251.     val resultMaybeHashBucket =
  252.       listBucket.foldLeft[Option[Array[Option[K]]]](Some(initialHashBucket))((maybeHashBucket, key) => {
  253.         maybeHashBucket match {
  254.           case None => None
  255.           case Some(hashBucket) => {
  256.             val index = hashes(key) % hashBucketSize;
  257.             hashBucket(index) match {
  258.               case Some(_) => None
  259.               case None => Some(hashBucket.updated(index, Some(key)))
  260.             }
  261.           }
  262.         }
  263.       })
  264.     resultMaybeHashBucket match {
  265.       case Some(resultHashBucket) => resultHashBucket
  266.       case None => hashifyBucketInner(hashes, listBucket, hashBucketSize + 1)
  267.     }
  268.   }
  269. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement