1 /** 2 License: public domain 3 Authors: Simon Bürger 4 */ 5 6 module jive.set; 7 8 import std.algorithm; 9 import std.range; 10 11 /* 12 * TODO: 13 * - statically check that V is indeed hashable (otherwise typeid(V).getHash(&v) will produce garbage) 14 * - do some hash-scrambling to avoid some trivial worst cases 15 * - dont store the hash explicitly for trivially hashable objects (such as int) 16 * - postblit should decrease table size when appropriate 17 * - using T != V in find/opIn_r/remove is possibly unsafe, because it relies on compatible hash functions 18 * - use something smarter than linked lists for the buckets to improve worst-case 19 */ 20 21 /** 22 * An unordered set. Internally a hash table. Value-semantics. 23 */ 24 struct Set(V) 25 { 26 /** element inside the hash-table */ 27 private static struct Node 28 { 29 Node* next; 30 hash_t hash; 31 V value; 32 } 33 34 private Node*[] table; // the hash-table itself 35 private size_t count; // element count for O(1)-length 36 37 38 /////////////////////////////////////////////////////////////////// 39 // constructors 40 ////////////////////////////////////////////////////////////////////// 41 42 /** constructor that gets content from arbitrary range */ 43 this(Stuff)(Stuff data) 44 if(isInputRange!Stuff && is(ElementType!Stuff:V)) 45 { 46 if(hasLength!Stuff) 47 reserve(something); 48 49 foreach(ref x; data) 50 add(x); 51 } 52 53 /** post-blit that does a full copy */ 54 this(this) 55 { 56 static Node* dupNode(Node* node) 57 { 58 if(node is null) 59 return null; 60 return new Node(dupNode(node.next), node.hash, node.value); 61 } 62 63 table = table.dup; // TODO: use smaller table if sufficient 64 foreach(ref bucket; table) 65 bucket = dupNode(bucket); 66 } 67 68 69 //////////////////////////////////////////////////////////////// 70 // metrics 71 ////////////////////////////////////////////////////////////////////// 72 73 /** returns: true if set is empty */ 74 bool empty() const @property nothrow @safe 75 { 76 return count == 0; 77 } 78 79 /** returns: number of elements in the set */ 80 size_t length() const @property nothrow @safe 81 { 82 return count; 83 } 84 85 /** 86 * Resize hashtable such that the set can contain minSize elements without 87 * further resizing. Note that allocation still occurs on addition, even 88 * after using reserve. 89 */ 90 void reserve(size_t size) 91 { 92 if(size <= table.length) 93 return; 94 95 foreach(prime; primeList) 96 if(prime >= size) 97 { 98 size = prime; 99 break; 100 } 101 102 auto old = table; 103 table = new Node*[size]; 104 105 foreach(n; old) 106 while(n !is null) 107 { 108 auto m = n; 109 n = n.next; 110 111 auto index = m.hash % table.length; 112 m.next = table[index]; 113 table[index] = m; 114 } 115 116 delete old; 117 } 118 119 120 ////////////////////////////////////////////////////////////////////// 121 // finding, reading 122 ////////////////////////////////////////////////////////////////////// 123 124 /** private helper, null if not found */ 125 package inout(Node)* findNode(T)(auto ref const(T) value) inout 126 if(is(typeof(T.init == V.init))) 127 { 128 if(table.length == 0) // the '%' breaks on zero length, so check for it 129 return null; 130 131 auto hash = typeid(T).getHash(&value); 132 auto index = hash % table.length; 133 134 for(inout(Node)* node = table[index]; node !is null; node = node.next) 135 if(node.hash == hash) 136 if(node.value == value) 137 return node; 138 return null; 139 } 140 141 /** returns: pointer to value inside set, null if not found */ 142 inout(V)* find(T)(auto ref const(T) value) inout 143 if(is(typeof(T.init == V.init))) 144 { 145 auto node = findNode(value); 146 if(node is null) 147 return null; 148 return &node.value; 149 } 150 151 /** returns: true if value is found in the set */ 152 bool opIn_r(T)(auto ref const(T) value) const 153 if(is(typeof(T.init == V.init))) 154 { 155 return findNode(value) !is null; 156 } 157 158 159 ////////////////////////////////////////////////////////////////////// 160 // add, remove 161 ////////////////////////////////////////////////////////////////////// 162 163 /** 164 * Add an element to the set. 165 * returns: true if new element was added, false if not (due to duplicate already present) 166 */ 167 bool add(V value) 168 { 169 reserve(count+1); 170 171 auto hash = typeid(V).getHash(&value); 172 auto index = hash % table.length; 173 174 // check if element already exists 175 for(auto node = table[index]; node !is null; node = node.next) 176 if(node.hash == hash) 177 if(node.value == value) 178 return false; 179 180 // add new element 181 table[index] = new Node(table[index], hash, move(value)); 182 ++count; 183 return true; 184 } 185 186 /** 187 * Add elements from a range to the set. 188 * returns: number of elements added 189 */ 190 size_t add(Stuff)(Stuff data) 191 if(!is(Stuff:V) && isInputRange!Stuff && is(ElementType!Stuff:V)) 192 { 193 if(hasLength!Stuff) 194 reserve(count + data.length); // might be too much due to duplicates, but typically it should be a good idea 195 196 size_t r = 0; 197 foreach(x; data) 198 if(add(x)) 199 ++r; 200 return r; 201 } 202 203 /** 204 * Remove an element from the set. 205 * returns: true if removed, false if not found 206 */ 207 bool remove(T)(auto ref const(T) value) 208 if(is(typeof(T.init == V.init))) 209 { 210 if(table.length == 0) // the '%' breaks on zero length, so check for it 211 return false; 212 213 auto hash = typeid(T).getHash(&value); 214 auto index = hash % table.length; 215 216 for(Node** node = &table[index]; *node !is null; node = &(*node).next) 217 if((*node).hash == hash) 218 if((*node).value == value) 219 { 220 Node* x = *node; 221 *node = (*node).next; 222 delete x; 223 --count; 224 return true; 225 } 226 227 return false; 228 } 229 230 /** 231 * Remove multiple elements from the set. 232 * returns: number of elements removed 233 */ 234 size_t remove(Stuff)(Stuff data) 235 if(!is(Stuff:V) && isInputRange!Stuff && is(ElementType!Stuff:V)) 236 { 237 size_t r = 0; 238 foreach(x; data) // TODO: 'ref' ? 239 if(remove(x)) 240 ++r; 241 return r; 242 } 243 244 245 ////////////////////////////////////////////////////////////////////// 246 // Traversal 247 ////////////////////////////////////////////////////////////////////// 248 249 /** 250 * Range types for iterating over elements of the set. 251 * Implements std.range.isForwardRange 252 */ 253 alias Range = .Range!(V, Node); 254 alias ConstRange = .Range!(const(V), const(Node)); 255 alias ImmutableRange = .Range!(immutable(V), immutable(Node)); 256 257 /** default range, iterates over everything in arbitrary order */ 258 Range opSlice() 259 { 260 return Range(table); 261 } 262 263 /** ditto */ 264 ConstRange opSlice() const 265 { 266 return ConstRange(table.dup); // TODO: remove the '.dup' 267 } 268 269 /** ditto */ 270 ImmutableRange opSlice() immutable 271 { 272 return ImmutableRange(table.dup); // TODO: remove the '.dup' 273 } 274 } 275 276 /** basic usage */ 277 unittest 278 { 279 Set!int a; 280 assert(a.add(1) == true); 281 assert(a.add([4,2,3,1,5]) == 4); 282 assert(a.remove(7) == false); 283 assert(a.remove([1,1,8,2]) == 2); 284 assert(a.remove(3) == true); 285 assert(a.length == 2); 286 } 287 288 unittest 289 { 290 Set!int a; 291 a.add(iota(0,10)); 292 const Set!int b = cast(const)a; 293 immutable Set!int c = cast(immutable)a; 294 assert(equal(b[], a[])); 295 assert(equal(c[], a[])); 296 assert(isForwardRange!(Set!int.Range)); 297 assert(isForwardRange!(Set!int.ConstRange)); 298 assert(isForwardRange!(Set!int.ImmutableRange)); 299 } 300 301 ////////////////////////////////////////////////////////////////////// 302 /// internals of the hash-table 303 ////////////////////////////////////////////////////////////////////// 304 305 private struct Range(V, Node) 306 { 307 private Node* curr; 308 private Node*[] table; 309 310 private this(Node*[] _table) 311 { 312 table = _table; 313 while(!table.empty && table[0] is null) 314 table.popFront; 315 if(!table.empty) 316 { 317 curr = table.front; 318 table.popFront; 319 } 320 } 321 322 bool empty() const 323 { 324 return curr is null; 325 } 326 327 ref V front() 328 { 329 return curr.value; 330 } 331 332 void popFront() 333 { 334 curr = curr.next; 335 if(curr !is null) 336 return; 337 while(!table.empty && table[0] is null) 338 table.popFront; 339 if(!table.empty) 340 { 341 curr = table.front; 342 table.popFront; 343 } 344 } 345 346 Range save() 347 { 348 return this; 349 } 350 } 351 352 /** prime numbers used as hash-table sizes */ 353 static if(size_t.sizeof >= long.sizeof) 354 private static immutable size_t[] primeList = [ 355 7,13,31,61, 356 127,251,509,1021, 357 2039,4093,8191,16381, 358 32749,65521,131071,262139, 359 524287,1048573,2097143,4194301, 360 8388593,16777213,33554393,67108859, 361 134217689,268435399,536870909,1073741789, 362 2147483647,4294967291,8589934583,17179869143, 363 34359738337,68719476731,137438953447,274877906899, 364 549755813881,1099511627689,2199023255531,4398046511093, 365 ]; 366 367 else 368 private static immutable size_t[] primeList = [ 369 7,13,31,61, 370 127,251,509,1021, 371 2039,4093,8191,16381, 372 32749,65521,131071,262139, 373 524287,1048573,2097143,4194301, 374 8388593,16777213,33554393,67108859, 375 134217689,268435399,536870909,1073741789, 376 ];