1 /**
2 License: public domain
3 Authors: Simon Bürger
4 */
5 
6 module jive.set;
7 
8 import std.algorithm;
9 import std.range;
10 
11 /*
12  *  TODO:
13  *    - statically check that V is indeed hashable (otherwise typeid(V).getHash(&v) will produce garbage)
14  *    - do some hash-scrambling to avoid some trivial worst cases
15  *    - dont store the hash explicitly for trivially hashable objects (such as int)
16  *    - postblit should decrease table size when appropriate
17  *    - using T != V in find/opIn_r/remove is possibly unsafe, because it relies on compatible hash functions
18  *    - use something smarter than linked lists for the buckets to improve worst-case
19  */
20 
21 /**
22  * An unordered set. Internally a hash table. Value-semantics.
23  */
24 struct Set(V)
25 {
26 	/** element inside the hash-table */
27 	private static struct Node
28 	{
29 		Node* next;
30 		hash_t hash;
31 		V value;
32 	}
33 
34 	private Node*[] table;        // the hash-table itself
35 	private size_t count;         // element count for O(1)-length
36 
37 
38 	///////////////////////////////////////////////////////////////////
39 	// constructors
40 	//////////////////////////////////////////////////////////////////////
41 
42 	/** constructor that gets content from arbitrary range */
43 	this(Stuff)(Stuff data)
44 		if(isInputRange!Stuff && is(ElementType!Stuff:V))
45 	{
46 		if(hasLength!Stuff)
47 			reserve(something);
48 
49 		foreach(ref x; data)
50 			add(x);
51 	}
52 
53 	/** post-blit that does a full copy */
54 	this(this)
55 	{
56 		static Node* dupNode(Node* node)
57 		{
58 			if(node is null)
59 				return null;
60 			return new Node(dupNode(node.next), node.hash, node.value);
61 		}
62 
63 		table = table.dup; // TODO: use smaller table if sufficient
64 		foreach(ref bucket; table)
65 			bucket = dupNode(bucket);
66 	}
67 
68 
69 	////////////////////////////////////////////////////////////////
70 	// metrics
71 	//////////////////////////////////////////////////////////////////////
72 
73 	/** returns: true if set is empty */
74 	bool empty() const @property nothrow @safe
75 	{
76 		return count == 0;
77 	}
78 
79 	/** returns: number of elements in the set */
80 	size_t length() const @property nothrow @safe
81 	{
82 		return count;
83 	}
84 
85 	/**
86 	 * Resize hashtable such that the set can contain minSize elements without
87 	 * further resizing. Note that allocation still occurs on addition, even
88 	 * after using reserve.
89 	 */
90 	void reserve(size_t size)
91 	{
92 		if(size <= table.length)
93 			return;
94 
95 		foreach(prime; primeList)
96 			if(prime >= size)
97 			{
98 				size = prime;
99 				break;
100 			}
101 
102 		auto old = table;
103 		table = new Node*[size];
104 
105 		foreach(n; old)
106 			while(n !is null)
107 			{
108 				auto m = n;
109 				n = n.next;
110 
111 				auto index = m.hash % table.length;
112 				m.next = table[index];
113 				table[index] = m;
114 			}
115 
116 		delete old;
117 	}
118 
119 
120 	//////////////////////////////////////////////////////////////////////
121 	// finding, reading
122 	//////////////////////////////////////////////////////////////////////
123 
124 	/** private helper, null if not found */
125 	package inout(Node)* findNode(T)(auto ref const(T) value) inout
126 		if(is(typeof(T.init == V.init)))
127 	{
128 		if(table.length == 0)	// the '%' breaks on zero length, so check for it
129 			return null;
130 
131 		auto hash = typeid(T).getHash(&value);
132 		auto index = hash % table.length;
133 
134 		for(inout(Node)* node = table[index]; node !is null; node = node.next)
135 			if(node.hash == hash)
136 				if(node.value == value)
137 					return node;
138 		return null;
139 	}
140 
141 	/** returns: pointer to value inside set, null if not found */
142 	inout(V)* find(T)(auto ref const(T) value) inout
143 		if(is(typeof(T.init == V.init)))
144 	{
145 		auto node = findNode(value);
146 		if(node is null)
147 			return null;
148 		return &node.value;
149 	}
150 
151 	/** returns: true if value is found in the set */
152 	bool opIn_r(T)(auto ref const(T) value) const
153 		if(is(typeof(T.init == V.init)))
154 	{
155 		return findNode(value) !is null;
156 	}
157 
158 
159 	//////////////////////////////////////////////////////////////////////
160 	// add, remove
161 	//////////////////////////////////////////////////////////////////////
162 
163 	/**
164 	 * Add an element to the set.
165 	 * returns: true if new element was added, false if not (due to duplicate already present)
166 	 */
167 	bool add(V value)
168 	{
169 		reserve(count+1);
170 
171 		auto hash = typeid(V).getHash(&value);
172 		auto index = hash % table.length;
173 
174 		// check if element already exists
175 		for(auto node = table[index]; node !is null; node = node.next)
176 			if(node.hash == hash)
177 				if(node.value == value)
178 					return false;
179 
180 		// add new element
181 		table[index] = new Node(table[index], hash, move(value));
182 		++count;
183 		return true;
184 	}
185 
186 	/**
187 	 * Add elements from a range to the set.
188 	 * returns: number of elements added
189 	 */
190 	size_t add(Stuff)(Stuff data)
191 		if(!is(Stuff:V) && isInputRange!Stuff && is(ElementType!Stuff:V))
192 	{
193 		if(hasLength!Stuff)
194 			reserve(count + data.length); // might be too much due to duplicates, but typically it should be a good idea
195 
196 		size_t r = 0;
197 		foreach(x; data)
198 			if(add(x))
199 				++r;
200 		return r;
201 	}
202 
203 	/**
204 	 * Remove an element from the set.
205 	 * returns: true if removed, false if not found
206 	 */
207 	bool remove(T)(auto ref const(T) value)
208 		if(is(typeof(T.init == V.init)))
209 	{
210 		if(table.length == 0)	// the '%' breaks on zero length, so check for it
211 			return false;
212 
213 		auto hash = typeid(T).getHash(&value);
214 		auto index = hash % table.length;
215 
216 		for(Node** node = &table[index]; *node !is null; node = &(*node).next)
217 			if((*node).hash == hash)
218 				if((*node).value == value)
219 				{
220 					Node* x = *node;
221 					*node = (*node).next;
222 					delete x;
223 					--count;
224 					return true;
225 				}
226 
227 		return false;
228 	}
229 
230 	/**
231 	 * Remove multiple elements from the set.
232 	 * returns: number of elements removed
233 	 */
234 	size_t remove(Stuff)(Stuff data)
235 		if(!is(Stuff:V) && isInputRange!Stuff && is(ElementType!Stuff:V))
236 	{
237 		size_t r = 0;
238 		foreach(x; data) // TODO: 'ref' ?
239 			if(remove(x))
240 				++r;
241 		return r;
242 	}
243 
244 
245 	//////////////////////////////////////////////////////////////////////
246 	// Traversal
247 	//////////////////////////////////////////////////////////////////////
248 
249 	/**
250 	 * Range types for iterating over elements of the set.
251 	 * Implements std.range.isForwardRange
252 	 */
253 	alias Range = .Range!(V, Node);
254 	alias ConstRange = .Range!(const(V), const(Node));
255 	alias ImmutableRange = .Range!(immutable(V), immutable(Node));
256 
257 	/** default range, iterates over everything in arbitrary order */
258 	Range opSlice()
259 	{
260 		return Range(table);
261 	}
262 
263 	/** ditto */
264 	ConstRange opSlice() const
265 	{
266 		return ConstRange(table.dup); // TODO: remove the '.dup'
267 	}
268 
269 	/** ditto */
270 	ImmutableRange opSlice() immutable
271 	{
272 		return ImmutableRange(table.dup); // TODO: remove the '.dup'
273 	}
274 }
275 
276 /** basic usage */
277 unittest
278 {
279 	Set!int a;
280 	assert(a.add(1) == true);
281 	assert(a.add([4,2,3,1,5]) == 4);
282 	assert(a.remove(7) == false);
283 	assert(a.remove([1,1,8,2]) == 2);
284 	assert(a.remove(3) == true);
285 	assert(a.length == 2);
286 }
287 
288 unittest
289 {
290 	Set!int a;
291 	a.add(iota(0,10));
292 	const Set!int b = cast(const)a;
293 	immutable Set!int c = cast(immutable)a;
294 	assert(equal(b[], a[]));
295 	assert(equal(c[], a[]));
296 	assert(isForwardRange!(Set!int.Range));
297 	assert(isForwardRange!(Set!int.ConstRange));
298 	assert(isForwardRange!(Set!int.ImmutableRange));
299 }
300 
301 //////////////////////////////////////////////////////////////////////
302 /// internals of the hash-table
303 //////////////////////////////////////////////////////////////////////
304 
305 private struct Range(V, Node)
306 {
307 	private Node* curr;
308 	private Node*[] table;
309 
310 	private this(Node*[] _table)
311 	{
312 		table = _table;
313 		while(!table.empty && table[0] is null)
314 			table.popFront;
315 		if(!table.empty)
316 		{
317 			curr = table.front;
318 			table.popFront;
319 		}
320 	}
321 
322 	bool empty() const
323 	{
324 		return curr is null;
325 	}
326 
327 	ref V front()
328 	{
329 		return curr.value;
330 	}
331 
332 	void popFront()
333 	{
334 		curr = curr.next;
335 		if(curr !is null)
336 			return;
337 		while(!table.empty && table[0] is null)
338 			table.popFront;
339 		if(!table.empty)
340 		{
341 			curr = table.front;
342 			table.popFront;
343 		}
344 	}
345 
346 	Range save()
347 	{
348 		return this;
349 	}
350 }
351 
352 /** prime numbers used as hash-table sizes */
353 static if(size_t.sizeof >= long.sizeof)
354 private static immutable size_t[] primeList = [
355 	7,13,31,61,
356 	127,251,509,1021,
357 	2039,4093,8191,16381,
358 	32749,65521,131071,262139,
359 	524287,1048573,2097143,4194301,
360 	8388593,16777213,33554393,67108859,
361 	134217689,268435399,536870909,1073741789,
362 	2147483647,4294967291,8589934583,17179869143,
363 	34359738337,68719476731,137438953447,274877906899,
364 	549755813881,1099511627689,2199023255531,4398046511093,
365 ];
366 
367 else
368 private static immutable size_t[] primeList = [
369 	7,13,31,61,
370 	127,251,509,1021,
371 	2039,4093,8191,16381,
372 	32749,65521,131071,262139,
373 	524287,1048573,2097143,4194301,
374 	8388593,16777213,33554393,67108859,
375 	134217689,268435399,536870909,1073741789,
376 ];