Advertisement
twist_vector

Generic hash table

Mar 11th, 2011
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.25 KB | None | 0 0
  1. #
  2. #
  3. # Nimrod's Runtime Library
  4. # (c) Copyright 2010 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9.  
  10. ## The ``gentabs`` module implements an efficient hash table that is a key-value
  11. ## mapping. The keys are required to be strings, but the values may be any Nimrod
  12. ## or user defined type. This module supports matching of keys in case-sensitive,
  13. ## case-insensitive and style-insensitive modes.
  14.  
  15. import
  16. os, hashes, strutils
  17.  
  18. type
  19. TGenTableMode* = enum ## describes the table's key matching mode
  20. modeCaseSensitive, ## case sensitive matching of keys
  21. modeCaseInsensitive, ## case insensitive matching of keys
  22. modeStyleInsensitive ## style sensitive matching of keys
  23.  
  24. TGenKeyValuePair[T] = tuple[key: string, val: T]
  25. TGenKeyValuePairSeq[T] = seq[TGenKeyValuePair[T]]
  26. TGenTable*[T] = object of TObject
  27. counter: int
  28. data: TGenKeyValuePairSeq[T]
  29. mode: TGenTableMode
  30.  
  31. PGenTable*[T] = ref TGenTable[T] ## use this type to declare hash tables
  32.  
  33.  
  34. const
  35. growthFactor = 2
  36. startSize = 64
  37.  
  38.  
  39. proc len*[T](tbl: PGenTable[T]): int =
  40. ## returns the number of keys in `tbl`.
  41. result = tbl.counter
  42.  
  43.  
  44. iterator pairs*[T](tbl: PGenTable[T]): tuple[key: string, value: T] =
  45. ## iterates over any (key, value) pair in the table `tbl`.
  46. for h in 0..high(tbl.data):
  47. if not isNil(tbl.data[h].key):
  48. yield (tbl.data[h].key, tbl.data[h].val)
  49.  
  50.  
  51. proc myhash[T](tbl: PGenTable[T], key: string): THash =
  52. case tbl.mode
  53. of modeCaseSensitive: result = hashes.hash(key)
  54. of modeCaseInsensitive: result = hashes.hashIgnoreCase(key)
  55. of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key)
  56.  
  57.  
  58. proc myCmp[T](tbl: PGenTable[T], a, b: string): bool =
  59. case tbl.mode
  60. of modeCaseSensitive: result = cmp(a, b) == 0
  61. of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0
  62. of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0
  63.  
  64.  
  65. proc mustRehash(length, counter: int): bool =
  66. assert(length > counter)
  67. result = (length * 2 < counter * 3) or (length - counter < 4)
  68.  
  69.  
  70. proc newGenTable*[T](mode: TGenTableMode): PGenTable[T] =
  71. ## creates a new generic hash table that is empty.
  72. new(result)
  73. result.mode = mode
  74. result.counter = 0
  75. newSeq(result.data, startSize)
  76.  
  77.  
  78.  
  79. proc nextTry(h, maxHash: THash): THash {.inline.} =
  80. result = ((5 * h) + 1) and maxHash
  81.  
  82.  
  83. proc RawGet[T](tbl: PGenTable[T], key: string): int =
  84. var h: THash
  85. h = myhash(tbl, key) and high(tbl.data) # start with real hash value
  86. while not isNil(tbl.data[h].key):
  87. if mycmp(tbl, tbl.data[h].key, key):
  88. return h
  89. h = nextTry(h, high(tbl.data))
  90. result = - 1
  91.  
  92.  
  93. proc RawInsert[T](tbl: PGenTable[T], data: var TGenKeyValuePairSeq[T], key: string, val: T) =
  94. var h: THash
  95. h = myhash(tbl, key) and high(data)
  96. while not isNil(data[h].key):
  97. h = nextTry(h, high(data))
  98. data[h].key = key
  99. data[h].val = val
  100.  
  101.  
  102. proc Enlarge[T](tbl: PGenTable[T]) =
  103. var n: TGenKeyValuePairSeq[T]
  104. newSeq(n, len(tbl.data) * growthFactor)
  105. for i in countup(0, high(tbl.data)):
  106. if not isNil(tbl.data[i].key): RawInsert[T](tbl, n, tbl.data[i].key, tbl.data[i].val)
  107. swap(tbl.data, n)
  108.  
  109.  
  110. proc hasKey*[T](tbl: PGenTable[T], key: string): bool =
  111. ## returns true iff `key` is in the table `tbl`.
  112. result = rawGet(tbl, key) >= 0
  113.  
  114.  
  115. proc `[]`*[T](tbl: PGenTable[T], key: string): T =
  116. ## retrieves the value at ``tbl[key]``. If `key` is not in `tbl`, "" is returned
  117. ## and no exception is raised. One can check with ``hasKey`` whether the key
  118. ## exists.
  119. var index: int
  120. index = RawGet(tbl, key)
  121. if index >= 0: result = tbl.data[index].val
  122. #else: result = "" ### Not sure what to do here
  123.  
  124. proc `[]=`*[T](tbl: PGenTable[T], key: string, val: T) =
  125. ## puts a (key, value)-pair into `tbl`.
  126. var index = RawGet(tbl, key)
  127. if index >= 0:
  128. tbl.data[index].val = val
  129. else:
  130. if mustRehash(len(tbl.data), tbl.counter): Enlarge(tbl)
  131. RawInsert(tbl, tbl.data, key, val)
  132. inc(tbl.counter)
  133.  
  134.  
  135. when isMainModule:
  136. #
  137. # Verify tables of integer values (string keys)
  138. #
  139. var x = newGenTable[int](modeCaseInsensitive)
  140. x["one"] = 1
  141. x["two"] = 2
  142. x["three"] = 3
  143. x["four"] = 4
  144. x["five"] = 5
  145. assert(len(x) == 5) # length procedure works
  146. assert(x["one"] == 1) # case-sensitive lookup works
  147. assert(x["ONE"] == 1) # case-insensitive should work for this table
  148. assert(x["one"]+x["two"] == 3) # make sure we're getting back ints
  149. assert(x.hasKey("one")) # hasKey should return 'true' for a key of "one"...
  150. assert(not x.hasKey("NOPE")) # ...but key "NOPE" is not in the table.
  151. for k,v in pairs(x): # make sure the 'pairs' iterator works
  152. assert(x[k]==v)
  153. echo()
  154.  
  155.  
  156. #
  157. # Verify a table of user-defined types
  158. #
  159. type
  160. TMyType = tuple[first, second: string] # a pair of strings
  161.  
  162. var y = newGenTable[TMyType](modeCaseInsensitive) # hash table where each value is TMyType tuple
  163.  
  164. #var junk: TMyType = ("OK", "Here")
  165. y["Hello"] = ("Hello", "World")
  166. y["Goodbye"] = ("Goodbye", "Everyone")
  167. #y["Hello"] = TMyType( ("Hello", "World") )
  168. #y["Goodbye"] = TMyType( ("Goodbye", "Everyone") )
  169.  
  170. assert( y["Hello"].first == "Hello" )
  171. assert( y["Hello"].second == "World" )
  172.  
  173.  
  174. #
  175. # Verify table of tables
  176. #
  177. var z: PGenTable[ PGenTable[int] ] # hash table where each value is a hash table of ints
  178.  
  179. z = newGenTable[PGenTable[int]](modeCaseInsensitive)
  180. z["first"] = newGenTable[int](modeCaseInsensitive)
  181. z["first"]["one"] = 1
  182. z["first"]["two"] = 2
  183. z["first"]["three"] = 3
  184.  
  185. z["second"] = newGenTable[int](modeCaseInsensitive)
  186. z["second"]["red"] = 10
  187. z["second"]["blue"] = 20
  188.  
  189. assert(len(z) == 2) # length of outer table
  190. assert(len(z["first"]) == 3) # length of "first" table
  191. assert(len(z["second"]) == 2) # length of "second" table
  192. assert( z["first"]["one"] == 1) # retrieve from first inner table
  193. assert( z["second"]["red"] == 10) # retrieve from second inner table
  194.  
  195. for k,v in pairs(z):
  196. echo( "$# ($#) ->" % [k,$len(v)] )
  197. #for k2,v2 in pairs(v):
  198. # echo( " $# <-> $#" % [k2,$v2] )
  199. echo()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement