/* Copyright © Charliecloud contributors. */

#define _GNU_SOURCE
#include "config.h"

#include <string.h>

#include "all.h"


/** Functions **/

/** Append a copy of an element to a list.

    @param ar[in,out]  Address of the array to append to. This array must be
                       on the heap, and it may be copied to a new, larger heap
                       array if more space is needed, in which case @c *ar is
                       set to point to the new array. Can be `NULL` to create
                       a new array containing only @p new.

    @param new[in]     Address of the element to append.

    @param size[in]    Size of one list element in bytes.

    Implementation note: We could round up the new size to the next power of
    two for allocation purposes, which would reduce the number of realloc()
    that actually change the size. However, many allocators do this type of
    thing internally already, and that seems a better place for it.

    > ![NOTE]
    >
    > @p ar must be type-cast because C cannot automatically convert to/from
    > double pointer to void. Using `void **` as we do relies on all pointers
    > having the same representation, which is true on most modern machines
    > but not guaranteed by the C standard [1].
    >
    > We could return a pointer to the (possibly new) array instead of using
    > an out parameter, which would avoid the double pointer and associated
    > non-portability but make it easy for callers to create dangling
    > pointers, i.e., after `a = list_append(b, ...)`, `b` will be invalid.
    > The concern here is not memory leaks but rather that `b` points to an
    > invalid buffer that likely *looks* valid.

    > ![WARNING]
    >
    > If list elements are themselves pointers, it is easy to mis-count
    > (de)reference operations and thus create a hard-to-debug junk list. For
    > example, consider this (correct) case:
    >
    >   char s[] = "skibidi";
    >   char **list = NULL;
    >   list_append((void **)&list, &s, sizeof(char *));
    >
    > This code maintains a array of strings, which in C means an array of
    > pointers to `char`. That is, on a 64-bit machine, `list` is an array of
    > 8-byte items, each of which is a pointer to `char`. Here, because we
    > want to append the pointer `s` *itself* to `list`, we must pass the
    > *address of* that pointer, i.e. `&s`. On ARM64, a hex memory view of
    > `list` might then look something like:
    >
    >   78 56 34 12 ff 7f 00 00  00 00 00 00 00 00 00 00
    >
    > Here, the first 8 bytes are a stack address (0x00007fff12345678 in
    > little-endian) and the second is the terminating zero.
    >
    > Consider, however, this subtly incorrect variation:
    >
    >   list_append((void **)&list, s, sizeof(char *));
    >
    > Here, instead of appending the address of `s` like we wanted, this
    > appends the *contents* of the string instead. Unfortunately, because
    > `s` is itself a pointer, the line builds fine and runs without error,
    > but the results are incorrect (as is common with memory handling bugs).
    > An ARM64 hex memory view might look like:
    >
    >   73 6b 69 62 69 64 69 00  00 00 00 00 00 00 00 00
    >
    > Note that the first 8 bytes have the odor of ASCII characters rather
    > than a stack (0x7fff ...) or heap (0x5555 ...) pointer.

    [1]: https://c-faq.com/ptrs/genericpp.html */
void list_append(void **ar, void *new, size_t size)
{
   size_t ct;
   T__ (new != NULL);

   ct = list_count(*ar, size);
   *ar = realloc_ch(*ar, (ct + 2) * size, true);
   memcpy((char *)*ar + ct * size, new, size);
   memset((char *)*ar + (ct + 1) * size, 0, size);
}

/** Copy the contents of list @p src onto the end of @p dst.

    If @p src is NULL, this is a no-op; specifically, if @p dst is NULL, it
    remains NULL rather than becoming an allocated list with zero items. */
void list_cat(void **dst, void *src, size_t size)
{
   size_t ct_dst, ct_src;

   if (src == NULL)
      return;

   ct_dst = list_count(*dst, size);
   ct_src = list_count(src, size);
   *dst = realloc_ch(*dst, (ct_dst+ct_src+1)*size, true);
   memcpy(*dst + ct_dst*size, src, ct_src*size);  // append src (no overlap)
   memset(*dst + (ct_dst+ct_src)*size, 0, size);  // set new terminator
}

/* Return the number of elements of size size in list *ar, not including the
   terminating zero element. */
size_t list_count(void *ar, size_t size)
{
   size_t ct;

   if (ar == NULL)
      return 0;

   for (ct = 0; !buf_zero_p((char *)ar + ct*size, size); ct++)
      ;
   return ct;
}

/** Remove duplicates from the list of strings (i.e., @c "char *") @p ar.

    Strings are considered equal if they have the same content, i.e. we’re not
    checking pointer values.

    The list may be re-ordered arbitrarily.

    Removal is done in-place and the array is not resized, so it may be
    (harmlessly) too large after this function. */
void list_dedup_strings(char **ar)
{
   int i, imax;

   // Ensure we have at least one element in the array, i.e. both ar[0] and
   // ar[1] must be valid, though the latter might be NULL.
   if (ar == NULL || ar[0] == NULL)
      return;

   // Algorithm: Iterate upward through the array. For each string, compare it
   // to all other strings later in the array. If there is a duplicate,
   // replace that duplicate with an arbitrary later element and shorten the
   // array by one.
   i = 0;
   imax = list_count(ar, sizeof(ar[0])) - 1;
   while (i < imax) {
      bool dup_found = false;
      for (int j = i+1; j < imax; j++)
         if (streq(ar[i], ar[j])) {
            dup_found = true;
            ar[j] = ar[imax];   // replace ar[j] with last element
            ar[imax--] = NULL;  // shorten array
            break;
         }
      if (!dup_found)
         i++;                   // ar[i] unique, go to next
   }
}

/* Return a pointer to a new, empty zero-terminated array containing elements
   of size size, with room for ct elements without re-allocation. The latter
   allows to pre-allocate an arbitrary number of slots in the list, which can
   then be filled directly without testing the list’s length for each one.
   (The list is completely filled with zeros, so every position has a
   terminator after it.) */
void *list_new(size_t ct, size_t size)
{
   T__ (size > 0);
   return malloc_zeroed((ct+1) * size, true);
}

/* Split str into tokens delimited by delim (multiple adjacent delimiters are
   treated as one). Copy each token into a newly-allocated string buffer, and
   return these strings as a new list.

   The function accepts a single delimiter, not multiple like strtok(3). */
void *list_new_strings(char delim, const char *str)
{
   char **list;
   char *str_, *tok_state;
   char delims[] = { delim, '\0' };
   size_t delim_ct = 0;

   // Count delimiters so we can allocate the right size list initially,
   // avoiding one realloc() per delimiter. Note this does not account for
   // adjacent delimiters and thus may overcount tokens, possibly wasting a
   // small amount of memory.
   for (int i = 0; str[i] != '\0'; i++)
      delim_ct += (str[i] == delim ? 1 : 0);

   list = list_new(delim_ct + 1, sizeof(char *));

   // Note: strtok_r(3)’s interface is rather awkward; see its man page.
   str_ = strdup_ch(str);
   tok_state = NULL;
   for (int i = 0; true; i++) {
      char *tok;
      tok = strtok_r(str_, delims, &tok_state);
      if (tok == NULL)
         break;
      T__ (i < delim_ct + 1);  // bounds check
      list[i] = tok;
      str_ = NULL;             // only pass actual string on first call
   }

   return list;
}