Thursday, March 21, 2013

How can I retrieve content from a URL in C?

I saw this question on Stackoverflow.com. Given that the OP had posted only Java code in a C question, and Stackoverflow is not a code translation service, I voted to close the question. However, given that C does not come standard libraries for network communication, the question is still interesting.

To accomplish this task, one must either dive into socket code, implementing the HTTP protocol by hand, ending up with platform specific code, or use a portable library built for this purpose. For this example, I chose libcurl. libcurl has an easy mode for making synchronous transfers. We are going to used that and specify a custom CURL_WRITEFUNCTION to save the retrieved content in a buffer. The code just prints out the contents of the buffer at the end. If that is your sole purpose, it would be better not to specify a callback, but just provide file pointer to which libcurl can send content as it arrives.

I have omitted most error checking in the interest of clarity. In addition, I specify an absurd MY_INITIAL_BUFFER_SIZE to make sure the buffer resizing works. You probably want to change that to (128 * 1024) to minimize memory reallocations. I also set an rather arbitrary cap on how big the buffer can grow.

The part of the code dealing with libcurl is rather straightforward: Just set a bunch of options and execute. An overview of libcurl's easy mode reveals as much.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>

#define MY_INITIAL_BUFFER_SIZE (1)
#define MY_MAXIMUM_BUFFER_SIZE (4 * 1024 * 1024)

struct my_buffer {
    unsigned char *memory;
    size_t size;
    size_t used;
};

static size_t
my_curl_write(char *ptr, size_t size, size_t nmemb, void *userdata){
    struct my_buffer *buffer = userdata;
    size_t needed = size * nmemb;

    if (needed > (buffer->size - buffer->used)) {
        unsigned char *new_memory;
        size_t new_size = 2 * buffer->size;
        while (needed > (new_size - buffer->used)) {
            new_size *= 2;
            if (new_size > (MY_MAXIMUM_BUFFER_SIZE)) {
                return 0;
            }
        }
        new_memory = realloc(buffer->memory, new_size);
        if (!new_memory) {
            return 0;
        }
        buffer->memory = new_memory;
        buffer->size = new_size;
    }
    memcpy(buffer->memory + buffer->used, ptr, needed);
    buffer->used += needed;
    return needed;
}

static CURLcode get_url(
        const char *url,
        struct my_buffer *buffer,
        char *curl_error
        ) {
    CURLcode result;
    CURL *my_curl = curl_easy_init();

    curl_easy_setopt(my_curl, CURLOPT_ERRORBUFFER, curl_error);
    curl_easy_setopt(my_curl, CURLOPT_FOLLOWLOCATION, 1);
    curl_easy_setopt(my_curl, CURLOPT_URL, url);
    curl_easy_setopt(my_curl, CURLOPT_WRITEFUNCTION, my_curl_write);
    curl_easy_setopt(my_curl, CURLOPT_WRITEDATA, buffer);

    result = curl_easy_perform(my_curl);

    curl_easy_cleanup(my_curl);

    return result;
}

int main(void) {
    CURLcode result;
    struct my_buffer buffer;
    char curl_error[CURL_ERROR_SIZE];

    buffer.memory = malloc(MY_INITIAL_BUFFER_SIZE);
    buffer.size = MY_INITIAL_BUFFER_SIZE;
    buffer.used = 0;

    result = get_url("http://www.example.com/", &buffer, curl_error);

    if (result == 0) {
        fwrite(buffer.memory, 1, buffer.used, stdout);
    }
    else {
        puts(curl_error);
    }

    return 0;
}

1 comment: