From 588a2232cf24183218d88c85003f2e6093f942ed Mon Sep 17 00:00:00 2001
From: Benjamin Auder <benjamin.auder@somewhere>
Date: Sun, 7 Feb 2021 13:13:58 +0100
Subject: [PATCH] Add basic Set implementation - TODO: add iterators for Set
 and HashTable

---
 .gitignore         |   1 +
 src/HashTable.c    |  10 +--
 src/HashTable.h    |   4 +-
 src/Set.c          | 177 +++++++++++++++++++++++++++++++++++++++++++++
 src/Set.h          | 156 +++++++++++++++++++++++++++++++++++++++
 test/main.c        |   8 ++
 test/t.HashTable.c |   4 +-
 test/t.Set.c       | 143 ++++++++++++++++++++++++++++++++++++
 test/t.Vector.c    |   4 +-
 9 files changed, 496 insertions(+), 11 deletions(-)
 create mode 100644 src/Set.c
 create mode 100644 src/Set.h
 create mode 100644 test/t.Set.c

diff --git a/.gitignore b/.gitignore
index 941f4fb..7155340 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.o
 /src/obj/*.so
+/test/main.c
 /test/test
 /test/vgcore.*
 /doc/html/
diff --git a/src/HashTable.c b/src/HashTable.c
index d54934b..468de81 100644
--- a/src/HashTable.c
+++ b/src/HashTable.c
@@ -85,9 +85,8 @@ void* _hashtable_get(HashTable* hashTable, char* key)
 void _hashtable_set(HashTable* hashTable, char* key, void* data)
 {
   UInt hashIdx = _compute_hash(key, hashTable->hashSize);
-  HashCell
-    *cell = hashTable->head[hashIdx],
-    *prev = NULL;
+  HashCell *cell = hashTable->head[hashIdx],
+           *prev = NULL;
   while (cell != NULL)
   {
     if (strcmp(cell->key, key) == 0)
@@ -116,9 +115,8 @@ void _hashtable_set(HashTable* hashTable, char* key, void* data)
 void hashtable_delete(HashTable* hashTable, char* key)
 {
   UInt hashIdx = _compute_hash(key, hashTable->hashSize);
-  HashCell
-    *cell = hashTable->head[hashIdx],
-    *prev = NULL;
+  HashCell *cell = hashTable->head[hashIdx],
+           *prev = NULL;
   while (cell != NULL)
   {
     if (strcmp(cell->key, key) == 0)
diff --git a/src/HashTable.h b/src/HashTable.h
index 686e2bb..9ed3755 100644
--- a/src/HashTable.h
+++ b/src/HashTable.h
@@ -90,7 +90,7 @@ void* _hashtable_get(
  * @param key Key of the element to retrieve..
  * @param data 'out' variable (ptr) to contain the result.
  *
- * Usage: void hashtable_get(HashTable* hashTable, char* key, void data)
+ * Usage: void hashtable_get(HashTable* hashTable, char* key, void* data)
  */
 #define hashtable_get(hashTable, key, data) \
 { \
@@ -122,6 +122,8 @@ void _hashtable_set(
 
 /**
  * @brief Remove the given key (+ associated value).
+ *
+ * Usage: void hashtable_delete(HashTable* hashTable, char* key)
  */
 void hashtable_delete(
   HashTable* hashTable, ///< "this" pointer.
diff --git a/src/Set.c b/src/Set.c
new file mode 100644
index 0000000..0502bf6
--- /dev/null
+++ b/src/Set.c
@@ -0,0 +1,177 @@
+/**
+ * @file Set.c
+ */
+
+#include "cgds/Set.h"
+
+void _set_init(Set* set, size_t dataSize, size_t hashSize,
+               UInt (*getHash)(void*, size_t))
+{
+  set->hashSize = hashSize;
+  set->dataSize = dataSize;
+  set->head = safe_malloc(hashSize * sizeof(SetCell*));
+  for (UInt i = 0; i < hashSize; i++)
+    set->head[i] = NULL;
+  set->size = 0;
+  set->getHash = getHash; //may be NULL
+}
+
+Set* _set_new(size_t dataSize, size_t hashSize, UInt (*getHash)(void*, size_t))
+{
+  Set* set = (Set*) safe_malloc(sizeof(Set));
+  _set_init(set, dataSize, hashSize, getHash);
+  return set;
+}
+
+Set* set_copy(Set* set)
+{
+  Set* setCopy = _set_new(set->dataSize, set->hashSize, set->getHash);
+  setCopy->size = set->size;
+  for (UInt i = 0; i < set->hashSize; i++)
+  {
+    SetCell *cell = set->head[i],
+             *cellCopy = setCopy->head[i],
+             *prev = NULL;
+    while (cell != NULL)
+    {
+      // cellCopy == NULL (from empty list)
+      cellCopy = (SetCell*) safe_malloc(sizeof(SetCell));
+      cellCopy->item = safe_malloc(set->dataSize);
+      memcpy(cellCopy->item, cell->item, set->dataSize);
+      if (prev == NULL) setCopy->head[i] = cellCopy;
+      else prev->next = cellCopy;
+      prev = cellCopy;
+      cell = cell->next;
+    }
+    if (cellCopy != NULL) cellCopy->next = NULL;
+  }
+  return setCopy;
+}
+
+bool set_empty(Set* set)
+{
+  return (set->size == 0);
+}
+
+UInt set_size(Set* set)
+{
+  return set->size;
+}
+
+// Function (string) key --> (integer) hash [internal usage]
+// Default function. Can be changed (see hashtable_new())
+UInt _set_compute_hash(void* key, size_t dataSize, size_t hashSize)
+{
+  UInt res = 0;
+  // Interpret the bytes in key as a piece of string
+  unsigned char* keyStr = (unsigned char*)key;
+  for (size_t i = 0; i < dataSize; i++)
+    // NOTE: '31' from here https://stackoverflow.com/a/4384446
+    res = (*(keyStr+i) + 31 * res) % hashSize;
+  return res;
+}
+
+// Get hash index from key [internal usage]
+UInt _set_get_hindex(Set* set, void* key)
+{
+  if (set->getHash == NULL)
+    return _set_compute_hash(key, set->dataSize, set->hashSize);
+  return set->getHash(key, set->hashSize);
+}
+
+bool set_has(Set* set, void* item)
+{
+  UInt hashIdx = _set_get_hindex(set, item);
+  SetCell* cell = set->head[hashIdx];
+  while (cell != NULL)
+  {
+    if (memcmp(cell->item, item, set->dataSize) == 0)
+      return true;
+    cell = cell->next;
+  }
+  return false;
+}
+
+void _set_add(Set* set, void* item)
+{
+  UInt hashIdx = _set_get_hindex(set, item);
+  SetCell *cell = set->head[hashIdx],
+          *prev = NULL;
+  while (cell != NULL)
+  {
+    if (memcmp(cell->item, item, set->dataSize) == 0)
+      // Already here: nothing to do
+      return;
+    prev = cell;
+    cell = cell->next;
+  }
+  // New element: insert after prev (which may be NULL)
+  SetCell* newCell = (SetCell*) safe_malloc(sizeof(SetCell));
+  newCell->item = safe_malloc(set->dataSize);
+  memcpy(newCell->item, item, set->dataSize);
+  newCell->next = NULL;
+  if (prev == NULL)
+    set->head[hashIdx] = newCell;
+  else
+    prev->next = newCell;
+  set->size++;
+}
+
+void _set_delete(Set* set, void* item)
+{
+  UInt hashIdx = _set_get_hindex(set, item);
+  SetCell *cell = set->head[hashIdx],
+          *prev = NULL;
+  while (cell != NULL)
+  {
+    if (memcmp(cell->item, item, set->dataSize) == 0)
+    {
+      if (prev == NULL)
+        set->head[hashIdx] = cell->next;
+      else
+        prev->next = cell->next;
+      safe_free(cell->item);
+      safe_free(cell);
+      set->size--;
+      break;
+    }
+    prev = cell;
+    cell = cell->next;
+  }
+}
+
+Vector* set_to_vector(Set* set) {
+  Vector* v = _vector_new(set->dataSize);
+  for (UInt i = 0; i < set->hashSize; i++) {
+    SetCell* cell = set->head[i];
+    while (cell != NULL) {
+      _vector_push(v, cell->item);
+      cell = cell->next;
+    }
+  }
+  return v;
+}
+
+void set_clear(Set* set)
+{
+  for (UInt i = 0; i < set->hashSize; i++)
+  {
+    SetCell* cell = set->head[i];
+    while (cell != NULL)
+    {
+      SetCell* next = cell->next;
+      safe_free(cell->item);
+      safe_free(cell);
+      cell = next;
+    }
+    set->head[i] = NULL;
+  }
+  set->size = 0;
+}
+
+void set_destroy(Set* set)
+{
+  set_clear(set);
+  safe_free(set->head);
+  safe_free(set);
+}
diff --git a/src/Set.h b/src/Set.h
new file mode 100644
index 0000000..8fc0663
--- /dev/null
+++ b/src/Set.h
@@ -0,0 +1,156 @@
+/**
+ * @file Set.h
+ */
+
+#ifndef CGDS_SET_H
+#define CGDS_SET_H
+
+#include <stdlib.h>
+#include <string.h>
+#include "cgds/safe_alloc.h"
+#include "cgds/types.h"
+#include "cgds/Vector.h"
+
+/**
+ * @brief Cell of a set.
+ */
+typedef struct SetCell {
+  void* item; ///< Generic data (key) contained in this cell.
+  struct SetCell* next; ///< Pointer to next cell in the list.
+} SetCell;
+
+/**
+ * @brief Generic set containing any data (of same size).
+ */
+typedef struct Set {
+  UInt size; ///< Count elements in the set.
+  size_t dataSize; ///< Size of a set cell element in bytes.
+  size_t hashSize; ///< (Maximum) Number of stored hash keys.
+  SetCell** head; ///< Pointers to the first cell in a list.
+  UInt (*getHash)(void*, size_t); ///< Custom hash function (optional)
+} Set;
+
+/**
+ * @brief Initialize an empty set.
+ */
+void _set_init(
+  Set* set, ///< "this" pointer.
+  size_t dataSize, ///< Size in bytes of a set element.
+  size_t hashSize, ///< (Maximum) Number of stored hash keys.
+  UInt (*getHash)(void*, size_t); ///< Custom hash function (optional)
+);
+
+/**
+ * @brief Return an allocated and initialized set.
+ */
+Set* _set_new(
+  size_t dataSize, ///< Size in bytes of a set element.
+  size_t hashSize, ///< (Maximum) Number of stored hash keys.
+  UInt (*getHash)(void*, size_t) ///< Custom hash function (nullable)
+);
+
+/**
+ * @brief Return an allocated and initialized set.
+ * @param type Type of a set element (int, char*, ...).
+ * @param hsize Size of the internal pointers array.
+ * @param getHash Custom hash function (nullable)
+ *
+ * Usage: Set* set_new(<Type> type, UInt hash_size, UInt (*getHash)(void*, size_t))
+ */
+#define set_new(type, hsize, getHash) \
+  _set_new(sizeof(type), hsize, getHash)
+
+/**
+ * @brief Copy constructor (shallow copy, ok for basic types).
+ */
+Set* set_copy(
+  Set* set ///< "this" pointer.
+);
+
+/**
+ * @brief Check if the set is empty.
+ */
+bool set_empty(
+  Set* set ///< "this" pointer.
+);
+
+/**
+ * @brief Return current size.
+ */
+UInt set_size(
+  Set* set ///< "this" pointer.
+);
+
+/**
+ * @brief Lookup given element.
+ *
+ * Usage: bool set_has(Set* set, void* item)
+ */
+bool set_has(
+  Set* set, ///< "this" pointer.
+  void* item ///< Element to search.
+);
+
+/**
+ * @brief Add an item to the set.
+ */
+void _set_add(
+  Set* set, ///< "this" pointer.
+  void* item ///< Element to add.
+);
+
+/**
+ * @brief Add a key to the set.
+ * @param set "this" pointer.
+ * @param item Element to add.
+ *
+ * Usage: void set_add(Set* set, void item)
+ */
+#define set_add(set, item) \
+{ \
+  typeof(item) tmp = item; \
+  _set_add(set, &tmp); \
+}
+
+/**
+ * @brief Remove the given item.
+ */
+void _set_delete(
+  Set* set, ///< "this" pointer.
+  void* item ///< Element to delete.
+);
+
+/**
+ * @brief Remove the given item.
+ * @param item Element to remove.
+ *
+ * Usage: void set_delete(Set* set, void item)
+ */
+#define set_delete(set, item) \
+{ \
+  typeof(item) tmp = item; \
+  _set_delete(set, &tmp); \
+}
+
+/**
+ * @brief Initialize a vector with (pointers to) set elements.
+ */
+Vector* set_to_vector(
+  Set* set ///< "this" pointer.
+);
+
+/**
+ * @brief Clear the entire set.
+ */
+void set_clear(
+  Set* set ///< "this" pointer.
+);
+
+/**
+ * @brief Destroy the set: clear it, and free hashes array.
+ */
+void set_destroy(
+  Set* set ///< "this" pointer.
+);
+
+#endif
diff --git a/test/main.c b/test/main.c
index 00b957f..fe3b1cf 100644
--- a/test/main.c
+++ b/test/main.c
@@ -23,6 +23,14 @@ int main(int argc, char** argv)
 	t_heap_push_pop_evolved();
 	t_heap_copy();
 
+	//file ./t.Set.c :
+	t_set_clear();
+	t_set_size();
+	t_set_add_remove_basic();
+	t_set_getnull_modify();
+	t_set_copy();
+	t_set_tovect();
+
 	//file ./t.List.c :
 	t_list_clear();
 	t_list_size();
diff --git a/test/t.HashTable.c b/test/t.HashTable.c
index a77b2ca..e695f57 100644
--- a/test/t.HashTable.c
+++ b/test/t.HashTable.c
@@ -71,7 +71,7 @@ void t_hashtable_set_remove_basic()
     ckValue += 1.0;
   }
 
-  //Remove keys / values
+  // Remove keys / values
   for (int i = 0; i < n; i++)
   {
     key[3] = (char)(48 + i);
@@ -98,7 +98,7 @@ void t_hashtable_getnull_modify()
   }
   for (int i = 0; i < n; i++)
   {
-    //another way to access elements
+    // Another way to access elements
     key[3] = (char)(48 + i);
     StructTest1* st1Cell;
     hashtable_get(h, key, st1Cell);
diff --git a/test/t.Set.c b/test/t.Set.c
new file mode 100644
index 0000000..d85b0c6
--- /dev/null
+++ b/test/t.Set.c
@@ -0,0 +1,143 @@
+#include <stdlib.h>
+#include "cgds/Set.h"
+#include "helpers.h"
+#include "lut.h"
+
+UInt getHash_int(void* item, size_t hashSize) {
+  return *((int*)item) % hashSize;
+}
+
+void t_set_clear()
+{
+  Set* s = set_new(int, 16, getHash_int);
+  lu_assert(set_empty(s));
+
+  set_add(s, 0);
+  set_add(s, 1);
+  set_add(s, 2);
+
+  set_destroy(s);
+  s = set_new(int, 8, getHash_int);
+
+  set_add(s, 1);
+  set_add(s, 2);
+  set_add(s, 3);
+
+  set_clear(s);
+  lu_assert(set_empty(s));
+
+  set_destroy(s);
+}
+
+void t_set_size()
+{
+  Set* s = set_new(int, 16, getHash_int);
+  lu_assert(set_empty(s));
+
+  set_add(s, 0);
+  set_add(s, 1);
+  set_add(s, 2);
+  lu_assert_int_eq(set_size(s), 3);
+
+  set_add(s, 3);
+  set_add(s, 4);
+  lu_assert_int_eq(set_size(s), 5);
+
+  set_add(s, 5);
+  set_add(s, 6);
+  set_add(s, 7);
+  lu_assert_int_eq(set_size(s), 8);
+
+  set_destroy(s);
+}
+
+void t_set_add_remove_basic()
+{
+  int n = 10;
+
+  Set* s = set_new(double, 4, NULL);
+  for (double i = 0.0; i < n; i++)
+    set_add(s, i);
+  lu_assert_int_eq(set_size(s), n);
+
+  // Check values
+  for (double i = 0.0; i < n; i++)
+    lu_assert(set_has(s, &i));
+
+  // Remove items
+  for (double i = 0.0; i < n; i++)
+    set_delete(s, i);
+  lu_assert_int_eq(set_size(s), 0);
+
+  set_destroy(s);
+}
+
+void t_set_getnull_modify()
+{
+  int n = 10;
+
+  Set* s = set_new(StructTest1, 4, NULL);
+  // NOTE: using calloc() because probably StructTest1 is 4 + 8 bytes,
+  // aligned to 8 + 8 ==> 4 are left uninitialized ==> memcpy compares
+  // some junk values. TODO: custom equality function instead.
+  StructTest1* st1 = (StructTest1*) calloc(n , sizeof(StructTest1));
+  for (int i = 0; i < n; i++)
+  {
+    st1[i].a = random() % 42;
+    st1[i].b = (double) random() / RAND_MAX;
+    set_add(s, *(st1 + i));
+  }
+  for (int i = 0; i < n; i++)
+  {
+    // Another way to access elements
+    StructTest1* st1Cell = (StructTest1*) calloc(1, sizeof(StructTest1));
+    st1Cell->a = st1[7].a;
+    st1Cell->b = st1[7].b;
+    lu_assert(set_has(s, st1Cell));
+    free(st1Cell);
+  }
+
+  // has / has not:
+  StructTest1* stmp = (StructTest1*) calloc(1, sizeof(StructTest1));
+  stmp->a = 51;
+  stmp->b = 2.0;
+  lu_assert(!set_has(s, stmp));
+  free(stmp);
+  lu_assert(set_has(s, st1 + 4));
+  free(st1);
+
+  set_destroy(s);
+}
+
+void t_set_copy()
+{
+  int n = 10;
+
+  Set* s = set_new(int, 8, getHash_int);
+  for (int i = 0; i < n; i++)
+    set_add(s, i + 1);
+  Set* sc = set_copy(s);
+
+  lu_assert_int_eq(s->size, sc->size);
+  int *a, *b;
+  for (int i = 0; i < n; i++) {
+    int item = i + 1;
+    lu_assert(set_has(sc, &item));
+  }
+  set_destroy(s);
+  set_destroy(sc);
+}
+
+void t_set_tovect()
+{
+  int n = 10;
+
+  Set* s = set_new(int, 8, getHash_int);
+  for (int i = 0; i < n; i++)
+    set_add(s, i);
+
+  Vector* v = set_to_vector(s);
+  lu_assert(vector_size(v) == set_size(s));
+  vector_destroy(v);
+  set_destroy(s);
+}
diff --git a/test/t.Vector.c b/test/t.Vector.c
index 1ccf779..7579afb 100644
--- a/test/t.Vector.c
+++ b/test/t.Vector.c
@@ -65,7 +65,7 @@ void t_vector_push_pop_basic()
     vectorI_move_next(vi);
   }
 
-  // same, from end to beginning
+  // Same, from end to beginning
   ckValue = n - 1;
   vectorI_reset_end(vi);
   while (vectorI_has_data(vi))
@@ -94,7 +94,7 @@ void t_vector_push_pop_evolved()
   }
   for (int i = 0; i < n; i++)
   {
-    //another way to access elements
+    // Another way to access elements
     StructTest1 st1Cell;
     vector_get(v, i, st1Cell);
     lu_assert_int_eq(st1Cell.a, st1[i].a);
-- 
2.44.0