From: Benjamin Auder Date: Sun, 7 Feb 2021 12:13:58 +0000 (+0100) Subject: Add basic Set implementation - TODO: add iterators for Set and HashTable X-Git-Url: https://git.auder.net/%7B%7B%20path%28%27mixstore_static_home%27%29%20%7D%7D?a=commitdiff_plain;h=588a2232cf24183218d88c85003f2e6093f942ed;p=cgds.git Add basic Set implementation - TODO: add iterators for Set and HashTable --- diff --git a/.gitignore b/.gitignore index 941f4fb..7155340 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.o /src/obj/*.so +/test/main.c /test/test /test/vgcore.* /doc/html/ diff --git a/src/HashTable.c b/src/HashTable.c index d54934b..468de81 100644 --- a/src/HashTable.c +++ b/src/HashTable.c @@ -85,9 +85,8 @@ void* _hashtable_get(HashTable* hashTable, char* key) void _hashtable_set(HashTable* hashTable, char* key, void* data) { UInt hashIdx = _compute_hash(key, hashTable->hashSize); - HashCell - *cell = hashTable->head[hashIdx], - *prev = NULL; + HashCell *cell = hashTable->head[hashIdx], + *prev = NULL; while (cell != NULL) { if (strcmp(cell->key, key) == 0) @@ -116,9 +115,8 @@ void _hashtable_set(HashTable* hashTable, char* key, void* data) void hashtable_delete(HashTable* hashTable, char* key) { UInt hashIdx = _compute_hash(key, hashTable->hashSize); - HashCell - *cell = hashTable->head[hashIdx], - *prev = NULL; + HashCell *cell = hashTable->head[hashIdx], + *prev = NULL; while (cell != NULL) { if (strcmp(cell->key, key) == 0) diff --git a/src/HashTable.h b/src/HashTable.h index 686e2bb..9ed3755 100644 --- a/src/HashTable.h +++ b/src/HashTable.h @@ -90,7 +90,7 @@ void* _hashtable_get( * @param key Key of the element to retrieve.. * @param data 'out' variable (ptr) to contain the result. * - * Usage: void hashtable_get(HashTable* hashTable, char* key, void data) + * Usage: void hashtable_get(HashTable* hashTable, char* key, void* data) */ #define hashtable_get(hashTable, key, data) \ { \ @@ -122,6 +122,8 @@ void _hashtable_set( /** * @brief Remove the given key (+ associated value). + * + * Usage: void hashtable_delete(HashTable* hashTable, char* key) */ void hashtable_delete( HashTable* hashTable, ///< "this" pointer. diff --git a/src/Set.c b/src/Set.c new file mode 100644 index 0000000..0502bf6 --- /dev/null +++ b/src/Set.c @@ -0,0 +1,177 @@ +/** + * @file Set.c + */ + +#include "cgds/Set.h" + +void _set_init(Set* set, size_t dataSize, size_t hashSize, + UInt (*getHash)(void*, size_t)) +{ + set->hashSize = hashSize; + set->dataSize = dataSize; + set->head = safe_malloc(hashSize * sizeof(SetCell*)); + for (UInt i = 0; i < hashSize; i++) + set->head[i] = NULL; + set->size = 0; + set->getHash = getHash; //may be NULL +} + +Set* _set_new(size_t dataSize, size_t hashSize, UInt (*getHash)(void*, size_t)) +{ + Set* set = (Set*) safe_malloc(sizeof(Set)); + _set_init(set, dataSize, hashSize, getHash); + return set; +} + +Set* set_copy(Set* set) +{ + Set* setCopy = _set_new(set->dataSize, set->hashSize, set->getHash); + setCopy->size = set->size; + for (UInt i = 0; i < set->hashSize; i++) + { + SetCell *cell = set->head[i], + *cellCopy = setCopy->head[i], + *prev = NULL; + while (cell != NULL) + { + // cellCopy == NULL (from empty list) + cellCopy = (SetCell*) safe_malloc(sizeof(SetCell)); + cellCopy->item = safe_malloc(set->dataSize); + memcpy(cellCopy->item, cell->item, set->dataSize); + if (prev == NULL) setCopy->head[i] = cellCopy; + else prev->next = cellCopy; + prev = cellCopy; + cell = cell->next; + } + if (cellCopy != NULL) cellCopy->next = NULL; + } + return setCopy; +} + +bool set_empty(Set* set) +{ + return (set->size == 0); +} + +UInt set_size(Set* set) +{ + return set->size; +} + +// Function (string) key --> (integer) hash [internal usage] +// Default function. Can be changed (see hashtable_new()) +UInt _set_compute_hash(void* key, size_t dataSize, size_t hashSize) +{ + UInt res = 0; + // Interpret the bytes in key as a piece of string + unsigned char* keyStr = (unsigned char*)key; + for (size_t i = 0; i < dataSize; i++) + // NOTE: '31' from here https://stackoverflow.com/a/4384446 + res = (*(keyStr+i) + 31 * res) % hashSize; + return res; +} + +// Get hash index from key [internal usage] +UInt _set_get_hindex(Set* set, void* key) +{ + if (set->getHash == NULL) + return _set_compute_hash(key, set->dataSize, set->hashSize); + return set->getHash(key, set->hashSize); +} + +bool set_has(Set* set, void* item) +{ + UInt hashIdx = _set_get_hindex(set, item); + SetCell* cell = set->head[hashIdx]; + while (cell != NULL) + { + if (memcmp(cell->item, item, set->dataSize) == 0) + return true; + cell = cell->next; + } + return false; +} + +void _set_add(Set* set, void* item) +{ + UInt hashIdx = _set_get_hindex(set, item); + SetCell *cell = set->head[hashIdx], + *prev = NULL; + while (cell != NULL) + { + if (memcmp(cell->item, item, set->dataSize) == 0) + // Already here: nothing to do + return; + prev = cell; + cell = cell->next; + } + // New element: insert after prev (which may be NULL) + SetCell* newCell = (SetCell*) safe_malloc(sizeof(SetCell)); + newCell->item = safe_malloc(set->dataSize); + memcpy(newCell->item, item, set->dataSize); + newCell->next = NULL; + if (prev == NULL) + set->head[hashIdx] = newCell; + else + prev->next = newCell; + set->size++; +} + +void _set_delete(Set* set, void* item) +{ + UInt hashIdx = _set_get_hindex(set, item); + SetCell *cell = set->head[hashIdx], + *prev = NULL; + while (cell != NULL) + { + if (memcmp(cell->item, item, set->dataSize) == 0) + { + if (prev == NULL) + set->head[hashIdx] = cell->next; + else + prev->next = cell->next; + safe_free(cell->item); + safe_free(cell); + set->size--; + break; + } + prev = cell; + cell = cell->next; + } +} + +Vector* set_to_vector(Set* set) { + Vector* v = _vector_new(set->dataSize); + for (UInt i = 0; i < set->hashSize; i++) { + SetCell* cell = set->head[i]; + while (cell != NULL) { + _vector_push(v, cell->item); + cell = cell->next; + } + } + return v; +} + +void set_clear(Set* set) +{ + for (UInt i = 0; i < set->hashSize; i++) + { + SetCell* cell = set->head[i]; + while (cell != NULL) + { + SetCell* next = cell->next; + safe_free(cell->item); + safe_free(cell); + cell = next; + } + set->head[i] = NULL; + } + set->size = 0; +} + +void set_destroy(Set* set) +{ + set_clear(set); + safe_free(set->head); + safe_free(set); +} diff --git a/src/Set.h b/src/Set.h new file mode 100644 index 0000000..8fc0663 --- /dev/null +++ b/src/Set.h @@ -0,0 +1,156 @@ +/** + * @file Set.h + */ + +#ifndef CGDS_SET_H +#define CGDS_SET_H + +#include +#include +#include "cgds/safe_alloc.h" +#include "cgds/types.h" +#include "cgds/Vector.h" + +/** + * @brief Cell of a set. + */ +typedef struct SetCell { + void* item; ///< Generic data (key) contained in this cell. + struct SetCell* next; ///< Pointer to next cell in the list. +} SetCell; + +/** + * @brief Generic set containing any data (of same size). + */ +typedef struct Set { + UInt size; ///< Count elements in the set. + size_t dataSize; ///< Size of a set cell element in bytes. + size_t hashSize; ///< (Maximum) Number of stored hash keys. + SetCell** head; ///< Pointers to the first cell in a list. + UInt (*getHash)(void*, size_t); ///< Custom hash function (optional) +} Set; + +/** + * @brief Initialize an empty set. + */ +void _set_init( + Set* set, ///< "this" pointer. + size_t dataSize, ///< Size in bytes of a set element. + size_t hashSize, ///< (Maximum) Number of stored hash keys. + UInt (*getHash)(void*, size_t); ///< Custom hash function (optional) +); + +/** + * @brief Return an allocated and initialized set. + */ +Set* _set_new( + size_t dataSize, ///< Size in bytes of a set element. + size_t hashSize, ///< (Maximum) Number of stored hash keys. + UInt (*getHash)(void*, size_t) ///< Custom hash function (nullable) +); + +/** + * @brief Return an allocated and initialized set. + * @param type Type of a set element (int, char*, ...). + * @param hsize Size of the internal pointers array. + * @param getHash Custom hash function (nullable) + * + * Usage: Set* set_new( type, UInt hash_size, UInt (*getHash)(void*, size_t)) + */ +#define set_new(type, hsize, getHash) \ + _set_new(sizeof(type), hsize, getHash) + +/** + * @brief Copy constructor (shallow copy, ok for basic types). + */ +Set* set_copy( + Set* set ///< "this" pointer. +); + +/** + * @brief Check if the set is empty. + */ +bool set_empty( + Set* set ///< "this" pointer. +); + +/** + * @brief Return current size. + */ +UInt set_size( + Set* set ///< "this" pointer. +); + +/** + * @brief Lookup given element. + * + * Usage: bool set_has(Set* set, void* item) + */ +bool set_has( + Set* set, ///< "this" pointer. + void* item ///< Element to search. +); + +/** + * @brief Add an item to the set. + */ +void _set_add( + Set* set, ///< "this" pointer. + void* item ///< Element to add. +); + +/** + * @brief Add a key to the set. + * @param set "this" pointer. + * @param item Element to add. + * + * Usage: void set_add(Set* set, void item) + */ +#define set_add(set, item) \ +{ \ + typeof(item) tmp = item; \ + _set_add(set, &tmp); \ +} + +/** + * @brief Remove the given item. + */ +void _set_delete( + Set* set, ///< "this" pointer. + void* item ///< Element to delete. +); + +/** + * @brief Remove the given item. + * @param item Element to remove. + * + * Usage: void set_delete(Set* set, void item) + */ +#define set_delete(set, item) \ +{ \ + typeof(item) tmp = item; \ + _set_delete(set, &tmp); \ +} + +/** + * @brief Initialize a vector with (pointers to) set elements. + */ +Vector* set_to_vector( + Set* set ///< "this" pointer. +); + +/** + * @brief Clear the entire set. + */ +void set_clear( + Set* set ///< "this" pointer. +); + +/** + * @brief Destroy the set: clear it, and free hashes array. + */ +void set_destroy( + Set* set ///< "this" pointer. +); + +#endif diff --git a/test/main.c b/test/main.c index 00b957f..fe3b1cf 100644 --- a/test/main.c +++ b/test/main.c @@ -23,6 +23,14 @@ int main(int argc, char** argv) t_heap_push_pop_evolved(); t_heap_copy(); + //file ./t.Set.c : + t_set_clear(); + t_set_size(); + t_set_add_remove_basic(); + t_set_getnull_modify(); + t_set_copy(); + t_set_tovect(); + //file ./t.List.c : t_list_clear(); t_list_size(); diff --git a/test/t.HashTable.c b/test/t.HashTable.c index a77b2ca..e695f57 100644 --- a/test/t.HashTable.c +++ b/test/t.HashTable.c @@ -71,7 +71,7 @@ void t_hashtable_set_remove_basic() ckValue += 1.0; } - //Remove keys / values + // Remove keys / values for (int i = 0; i < n; i++) { key[3] = (char)(48 + i); @@ -98,7 +98,7 @@ void t_hashtable_getnull_modify() } for (int i = 0; i < n; i++) { - //another way to access elements + // Another way to access elements key[3] = (char)(48 + i); StructTest1* st1Cell; hashtable_get(h, key, st1Cell); diff --git a/test/t.Set.c b/test/t.Set.c new file mode 100644 index 0000000..d85b0c6 --- /dev/null +++ b/test/t.Set.c @@ -0,0 +1,143 @@ +#include +#include "cgds/Set.h" +#include "helpers.h" +#include "lut.h" + +UInt getHash_int(void* item, size_t hashSize) { + return *((int*)item) % hashSize; +} + +void t_set_clear() +{ + Set* s = set_new(int, 16, getHash_int); + lu_assert(set_empty(s)); + + set_add(s, 0); + set_add(s, 1); + set_add(s, 2); + + set_destroy(s); + s = set_new(int, 8, getHash_int); + + set_add(s, 1); + set_add(s, 2); + set_add(s, 3); + + set_clear(s); + lu_assert(set_empty(s)); + + set_destroy(s); +} + +void t_set_size() +{ + Set* s = set_new(int, 16, getHash_int); + lu_assert(set_empty(s)); + + set_add(s, 0); + set_add(s, 1); + set_add(s, 2); + lu_assert_int_eq(set_size(s), 3); + + set_add(s, 3); + set_add(s, 4); + lu_assert_int_eq(set_size(s), 5); + + set_add(s, 5); + set_add(s, 6); + set_add(s, 7); + lu_assert_int_eq(set_size(s), 8); + + set_destroy(s); +} + +void t_set_add_remove_basic() +{ + int n = 10; + + Set* s = set_new(double, 4, NULL); + for (double i = 0.0; i < n; i++) + set_add(s, i); + lu_assert_int_eq(set_size(s), n); + + // Check values + for (double i = 0.0; i < n; i++) + lu_assert(set_has(s, &i)); + + // Remove items + for (double i = 0.0; i < n; i++) + set_delete(s, i); + lu_assert_int_eq(set_size(s), 0); + + set_destroy(s); +} + +void t_set_getnull_modify() +{ + int n = 10; + + Set* s = set_new(StructTest1, 4, NULL); + // NOTE: using calloc() because probably StructTest1 is 4 + 8 bytes, + // aligned to 8 + 8 ==> 4 are left uninitialized ==> memcpy compares + // some junk values. TODO: custom equality function instead. + StructTest1* st1 = (StructTest1*) calloc(n , sizeof(StructTest1)); + for (int i = 0; i < n; i++) + { + st1[i].a = random() % 42; + st1[i].b = (double) random() / RAND_MAX; + set_add(s, *(st1 + i)); + } + for (int i = 0; i < n; i++) + { + // Another way to access elements + StructTest1* st1Cell = (StructTest1*) calloc(1, sizeof(StructTest1)); + st1Cell->a = st1[7].a; + st1Cell->b = st1[7].b; + lu_assert(set_has(s, st1Cell)); + free(st1Cell); + } + + // has / has not: + StructTest1* stmp = (StructTest1*) calloc(1, sizeof(StructTest1)); + stmp->a = 51; + stmp->b = 2.0; + lu_assert(!set_has(s, stmp)); + free(stmp); + lu_assert(set_has(s, st1 + 4)); + free(st1); + + set_destroy(s); +} + +void t_set_copy() +{ + int n = 10; + + Set* s = set_new(int, 8, getHash_int); + for (int i = 0; i < n; i++) + set_add(s, i + 1); + Set* sc = set_copy(s); + + lu_assert_int_eq(s->size, sc->size); + int *a, *b; + for (int i = 0; i < n; i++) { + int item = i + 1; + lu_assert(set_has(sc, &item)); + } + set_destroy(s); + set_destroy(sc); +} + +void t_set_tovect() +{ + int n = 10; + + Set* s = set_new(int, 8, getHash_int); + for (int i = 0; i < n; i++) + set_add(s, i); + + Vector* v = set_to_vector(s); + lu_assert(vector_size(v) == set_size(s)); + vector_destroy(v); + set_destroy(s); +} diff --git a/test/t.Vector.c b/test/t.Vector.c index 1ccf779..7579afb 100644 --- a/test/t.Vector.c +++ b/test/t.Vector.c @@ -65,7 +65,7 @@ void t_vector_push_pop_basic() vectorI_move_next(vi); } - // same, from end to beginning + // Same, from end to beginning ckValue = n - 1; vectorI_reset_end(vi); while (vectorI_has_data(vi)) @@ -94,7 +94,7 @@ void t_vector_push_pop_evolved() } for (int i = 0; i < n; i++) { - //another way to access elements + // Another way to access elements StructTest1 st1Cell; vector_get(v, i, st1Cell); lu_assert_int_eq(st1Cell.a, st1[i].a);