Wednesday, May 8, 2013

Merging two arrays of integers in C

Inspired by TDWTF's Cheaters Never Prosper story, I decided to put together an example in C.

Given that we'll be dealing with arrays of int and we'll have to pass size information around, first, I declare a struct that holds a pointer to the array and the current size:

struct int_array_with_size {
    size_t size; 
    int *ary; 
};

This is mostly to avoid longish argument lists. Besides, it's good to keep related information together.

Next, we need a way of creating such arrays:

static struct int_array_with_size *
new_int_array_with_size(
        size_t n_elem
        )
{
    struct int_array_with_size *p = malloc(sizeof(*p));
    if (!p) {
        return NULL;
    }

    p->ary = malloc(n_elem * (sizeof(*(p->ary))));
    if (!p->ary) {
        return NULL;
    }

    p->size = n_elem;
    return p;
}

We'd end up writing a lot of extra lines without a simple function to free such a struct:

static void free_int_array_with_size(
        struct int_array_with_size **p
        )
{
    free((*p)->ary);
    (*p)->ary = NULL;
    free(*p);
    *p = NULL;
    return;
}

A function to fill such an array with a bunch of integers is useful as well:

static void
fill_int_array(
        struct int_array_with_size *p,
        int start,
        int delta
        )
{
    size_t i;
    size_t k = p->size;

    for (i = 0; i < k; i += 1) {
        (p->ary)[i] = start;
        start += delta;
    }
    return;
}

And, of course, we might want to print the contents, too:

static void
print_int_array(
        const struct int_array_with_size *p
        )
{
    size_t i;
    size_t k = p->size;

    printf("[ ");
    for (i = 0; i < (k - 1); i += 1) {
        printf("%d, ", (p->ary)[i]);
    }
    printf("%d ]\n", (p->ary)[i]);

    return;
}

And, of course, we'll need min and max:

static size_t min_size_t(const size_t x, const size_t y)
{
    return (x < y) ? x : y;
}

static size_t max_size_t(const size_t x, const size_t y)
{
    return (x > y) ? x : y;
}

Sure, you can define a generic macro, but I am allergic to macros that evaluate expressions more than once.

The algorithm for merging is quite simple: Allocate a similar array that can hold all the elements of the arrays to be merged. Then, fill in the even indexed elements from the first array, and the odd-indexed elements from the second array, until you reach the end of the shorter one. Then, fill in the rest of the elements from the longer one:

struct int_array_with_size *
merge_int_arrays(
        const struct int_array_with_size *x,
        const struct int_array_with_size *y
        )
{
    int *end = NULL;
    int *dest = NULL;
    int *src = NULL;
    size_t limit = 0;
    size_t remainder_size = 0;

    struct int_array_with_size *r = malloc(sizeof(*r));
    if (!r) {
        return NULL;
    }

    r->size = x->size + y->size;
    r->ary = malloc(r->size * sizeof(*(r->ary)));

    if (!r->ary) {
        free(r);
        return NULL;
    }

    limit = min_size_t(x->size, y->size);
    src = x->ary;
    end = r->ary + 2 * limit;

    for (dest = r->ary; dest < end; dest += 2) {
        *dest = *src;
        src += 1;
    }

    src = y->ary;
    for (dest = (1 + r->ary); dest < end; dest += 2) {
        *dest = *src;
        src += 1;
    }

    dest = end;
    remainder_size = sizeof(*(r->ary)) *
        (max_size_t(x->size, y->size) - limit);

    src = (x->size > y->size) ? x->ary : y->ary;
    src += limit;
    memcpy(dest, src, remainder_size);

    return r;
}

Note the one bit of premature optimization where instead of having a single for loop that copies elements from both arrays in one go, I first fill in the even-indexed elements and then the odd-indexed elements based on the assumption that not jumping between memory locations is going to be faster. No, I didn't measure anything … just wrote it that way at first and later thought about why I did that ;-)

Finally, here's a short main as proof that code works for some values of "works":

int main(void) {
    struct int_array_with_size *r;
    struct int_array_with_size *x = new_int_array_with_size(3);
    struct int_array_with_size *y = new_int_array_with_size(5);

    if (!x || !y) {
        return EXIT_FAILURE;
    }

    fill_int_array(x, 1, 3);
    print_int_array(x);

    fill_int_array(y, 1000, 10);
    print_int_array(y);

    r = merge_int_arrays(x, y);
    if (!r) {
        return EXIT_FAILURE;
    }

    print_int_array(r);

    free_int_array_with_size(&r);
    free_int_array_with_size(&y);
    free_int_array_with_size(&x);

    return EXIT_SUCCESS;
}

Of course, what would we do without a Perl version?

#!/usr/bin/env perl

use 5.014;
use strict;
use warnings;

sub make_iterator {
    my $array = shift;
    return sub {
        state $i = 0;
        return $i <= $#$array ? $array->[$i++] : ();
    };
}

sub make_merger {
    my $iterators = shift;
    return sub { map $_->(), @$iterators };
}

my @iterators = map make_iterator($_), [1 .. 3], [5 .. 10];
my $merger = make_merger(\@iterators);

my @merged;
while (my @stripe = $merger->()) {
    push @merged, @stripe;
}

say "[ @merged ]";

OK, now I am being silly. That handles an arbitrary number of arrays as well as data types.

No comments:

Post a Comment