C 中的工作 curl_multi_perform() 示例

Working curl_multi_perform() example in C

我花了几个小时试图找出如何以一般方式实施 curl_multi_perform() 。这个例子可能对其他人有帮助。

它基本上采用结构对象并将所有 curl 输出作为字符串放入其中。然后程序员可以获取该字符串并进行他们喜欢的任何处理。如果有人有任何改进等,我会非常高兴看到它们。

这是一个名为“multicurl.h”的头文件,其中包含结构和函数原型。

#ifndef _MULTICURL_HEADER_H
#define _MULTICURL_HEADER_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>


// A data structure used to hold the result of the cURL request.
typedef struct{ 
    size_t size;
    char *memory;
} MemType;

// Function Prototypes
void *SetUpCurlHandle(char*,MemType*);
void *SetUpMultiCurlHandle();
void *PerformMultiCurl();

#endif

这是包含函数定义的名为 multicurl.c 的源代码文件。


#include "multicurl.h"

#define MAX_WAIT_MSECS 5*1000 /* Wait max. 5 seconds */

CURLM *mult_hnd = NULL;
int still_running = 0;

size_t write_callback(char *ptr, size_t size, size_t nmemb, void *userdata){// cURL callback function [read in datastream to memory]
    // This prototype is defined by cURL, with an argument at the end for our data structure.
    // This function is repeatedly called by cURL until there is no more data in the data stream; *ptr.
    size_t realsize = size * nmemb;// The number of bytes in the datastream [there is no NULL char]

    MemType *mem = (MemType *)userdata;
    char *tmp = realloc(mem->memory, mem->size + realsize + 1);// We add 1 for the NULL char.

    if (tmp == NULL){
        printf("Not Enough Memory, realloc returned NULL.\n");
        exit(EXIT_FAILURE);
    }

    mem->memory = tmp;
    memcpy(&(mem->memory[mem->size]), ptr, realsize);// Starting at the last element copy in datastream
    mem->size += realsize;// The actual size is realsize + 1, however realsize gives us the location of the last element.
    mem->memory[mem->size] = 0;// The datastream doesn't include a NULL char, so we zeroize the last element.
    // We overwrite the NULL char {the zeroized element} on the next callback iteration, if any.

    return realsize;// cURL crosschecks the datastream with this return value.
}

void *SetUpCurlHandle(char *url, MemType *output){// Take in a URL and a struct pointer address, set up curl easy handle.
    CURL *hnd = NULL;
    output->memory = malloc(1);              // Initialize the memory component of the structure.
    output->size = 0;                        // Initialize the size component of the structure.

    // Initialize the cURL handle.
    hnd = curl_easy_init();

    if(hnd){

        // Setup the cURL options.
        curl_easy_setopt(hnd, CURLOPT_BUFFERSIZE, 102400L);
        curl_easy_setopt(hnd, CURLOPT_URL, url);// Set the request URL
        curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L);
        curl_easy_setopt(hnd, CURLOPT_USERAGENT, "curl/7.76.0");
        curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50L);
        curl_easy_setopt(hnd, CURLOPT_HTTP_VERSION, (long)CURL_HTTP_VERSION_2TLS);
        curl_easy_setopt(hnd, CURLOPT_FTP_SKIP_PASV_IP, 1L);
        curl_easy_setopt(hnd, CURLOPT_TCP_KEEPALIVE, 1L);
        curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, write_callback);// The callback function to write data to.
        curl_easy_setopt(hnd, CURLOPT_WRITEDATA, (void *)output);// Send the address of the data struct to callback func.
        //curl_easy_setopt(hnd, CURLOPT_VERBOSE, 1);

        curl_multi_add_handle(mult_hnd, hnd);
    }else{
        output->memory[0] = '[=12=]';
    }    
    return NULL;// The output struct was passed by reference no need to return anything.
}

void *SetUpMultiCurlHandle(){// set up curl multi handle
    curl_global_init(CURL_GLOBAL_ALL);

    mult_hnd = curl_multi_init();
    return NULL;
}

void *PerformMultiCurl(){// Request data from remote server asynchronously
    CURLMsg *msg=NULL;
    CURL *hnd = NULL;
    CURLcode return_code = 0;
    int msgs_left = 0;

    curl_multi_perform(mult_hnd, &still_running);
    do {
        int numfds=0;
        int res = curl_multi_wait(mult_hnd, NULL, 0, MAX_WAIT_MSECS, &numfds);
        if(res != CURLM_OK) {
            fprintf(stderr, "error: curl_multi_wait() returned %d\n", res);
            return NULL;
        }
        curl_multi_perform(mult_hnd, &still_running);
        /* if there are still transfers, loop! */
    } while(still_running);

    while ((msg = curl_multi_info_read(mult_hnd, &msgs_left))) {
        if (msg->msg == CURLMSG_DONE) {
            hnd = msg->easy_handle;

            return_code = msg->data.result;
            if(return_code!=CURLE_OK) {
                fprintf(stderr, "CURL error code: %d\n", msg->data.result);
                continue;
            }

            curl_multi_remove_handle(mult_hnd, hnd);
            curl_easy_cleanup(hnd);
            hnd = NULL;
        }
        else {
            fprintf(stderr, "error: after curl_multi_info_read(), CURLMsg=%d\n", msg->msg);
        }
    }

    curl_multi_cleanup(mult_hnd);
    curl_global_cleanup();
    return NULL;
}

这是名为 multicurlexample.c

的主要源代码文件
//gcc -o multicurl -lcurl multicurlexample.c multicurl.c

#include "multicurl.h"

int main(){
    char* MyUrl1 = "https://api.weather.gov/stations/KBOS/observations/latest"; //Boston Weather
    char* MyUrl2 = "https://api.weather.gov/stations/KLGA/observations/latest"; //NYC Weather
    MemType MyOutputStruct1;
    MemType MyOutputStruct2;

    SetUpMultiCurlHandle();

    SetUpCurlHandle(MyUrl1, &MyOutputStruct1);
    SetUpCurlHandle(MyUrl2, &MyOutputStruct2);

    PerformMultiCurl();

    printf("Output:\n%s\n", MyOutputStruct1.memory);
    printf("Output:\n%s\n", MyOutputStruct2.memory);

    free( MyOutputStruct1.memory );
    free( MyOutputStruct2.memory );
    
    return 0;
}

这是上述示例的改进变体,使用提供的建议应该对线程更友好。

名为“multicurl.h”的头文件


#ifndef MULTICURL_HEADER_H
#define MULTICURL_HEADER_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>


// A data structure used to hold the result of the cURL request.
typedef struct{ 
    size_t size;
    char *memory;
} MemType;

// Function Prototypes
void *SetUpCurlHandle(CURLM *, char *, MemType *);
CURLM *SetUpMultiCurlHandle();
void *PerformMultiCurl(CURLM*);

#endif

一个 c 源代码文件,其函数定义名为“multicurl.c”

#include "multicurl.h"

#define MAX_WAIT_MSECS 5*1000 /* Wait max. 5 seconds */

size_t write_callback(char *ptr, size_t size, size_t nmemb, void *userdata){// cURL callback function [read in datastream to memory]
    // This prototype is provided by cURL, with an argument at the end for our data structure.
    // This function is repeatedly called by cURL until there is no more data in the data stream; *ptr [it is assumed cURL handles memory management for this pointer].
    size_t realsize = size * nmemb;// The number of bytes in the datastream [there is no NULL char]

    MemType *mem = (MemType *)userdata;
    char *tmp = realloc(mem->memory, mem->size + realsize + 1);// We add 1 for the NULL char.

    if (tmp == NULL){
        printf("Not Enough Memory, realloc returned NULL.\n");
        exit(EXIT_FAILURE);
    }

    mem->memory = tmp;
    memcpy(&(mem->memory[mem->size]), ptr, realsize);// Starting at the last element copy in datastream
    mem->size += realsize;// The actual size is realsize + 1, however realsize gives us the location of the last element.
    mem->memory[mem->size] = 0;// The datastream doesn't include a NULL char, so we zeroize the last element.
    // We overwrite the NULL char {the zeroized element} on the next callback iteration, if any.

    return realsize;// cURL crosschecks the datastream with this return value.
}

void *SetUpCurlHandle(CURLM * mh, char *url, MemType *output){// Take in a multi handle pointer, a URL and a struct pointer address, add an easy handle to the multi handle.
    CURL *hnd = NULL;
    output->memory = malloc(1);              // Initialize the memory component of the structure.
    output->size = 0;                        // Initialize the size component of the structure.

    // Initialize the cURL handle.
    hnd = curl_easy_init();

    if(hnd){

        // Setup the cURL options.
        curl_easy_setopt(hnd, CURLOPT_BUFFERSIZE, 102400L);
        curl_easy_setopt(hnd, CURLOPT_URL, url);// Set the request URL
        curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L);
        curl_easy_setopt(hnd, CURLOPT_USERAGENT, "curl/7.76.0");
        curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50L);
        curl_easy_setopt(hnd, CURLOPT_HTTP_VERSION, (long)CURL_HTTP_VERSION_2TLS);
        curl_easy_setopt(hnd, CURLOPT_FTP_SKIP_PASV_IP, 1L);
        curl_easy_setopt(hnd, CURLOPT_TCP_KEEPALIVE, 1L);
        curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, write_callback);// The callback function to write data to.
        curl_easy_setopt(hnd, CURLOPT_WRITEDATA, (void *)output);// Send the address of the data struct to callback func.
        //curl_easy_setopt(hnd, CURLOPT_VERBOSE, 1);

        curl_multi_add_handle(mh, hnd);
    }else{
        output->memory[0] = '[=11=]';
    }    
    return NULL;// The output struct was passed by reference no need to return anything.
}

CURLM *SetUpMultiCurlHandle(){
    curl_global_init(CURL_GLOBAL_ALL);

    CURLM * mh = curl_multi_init();
    return mh;
}

void *PerformMultiCurl(CURLM * mh)
/*Take in a preset multi handle, request data from the remote server asynchronously {it's assumed cURL is using threads transparent to the calling program}.
   Remove the handles from memory.*/
{
    CURLMsg *msg=NULL;
    CURL *hnd = NULL;
    CURLcode return_code = 0;
    int still_running = 0;
    int msgs_left = 0;

    curl_multi_perform(mh, &still_running); // You could potentially place this function call in an infinite loop from a separate thread and keep adding requests, such that they are conducted immediately and asynchronously with other requests.  I haven't researched this though.
    do {
        int numfds=0;
        int res = curl_multi_wait(mh, NULL, 0, MAX_WAIT_MSECS, &numfds);
        if(res != CURLM_OK) {
            fprintf(stderr, "error: curl_multi_wait() returned %d\n", res);
            return NULL;
        }
        curl_multi_perform(mh, &still_running);
        /* Without this loop the program will proceed to the next statement, most likely before the messages are retrieved from the server.
           The easy handle requests are conducted asynchronously, but one multi handle request is obviously conducted sequentially (can use pthreads to make asynchronous multi requests via this function).*/
    } while(still_running);
    
    /* This portion of the code will clean up and remove the handles from memory, you could change this to make them more persistent */
    while ((msg = curl_multi_info_read(mh, &msgs_left))) {
        if (msg->msg == CURLMSG_DONE) {
            hnd = msg->easy_handle;

            return_code = msg->data.result;
            if(return_code!=CURLE_OK) {
                fprintf(stderr, "CURL error code: %d\n", msg->data.result);
                continue;
            }

            curl_multi_remove_handle(mh, hnd);
            curl_easy_cleanup(hnd);
            hnd = NULL;
        }
        else {
            fprintf(stderr, "error: after curl_multi_info_read(), CURLMsg=%d\n", msg->msg);
        }
    }

    curl_multi_cleanup(mh);
    curl_global_cleanup();
    return NULL;
}

名为“multicurlexample.c”的主要 c 源代码文件

//gcc -o multicurl -lcurl multicurlexample.c multicurl.c
#include "multicurl.h"

int main(){
    CURLM *mult_hnd = SetUpMultiCurlHandle();
    /* I didn't incorporate this function because the use is arbitrary in this case (stack then heap memory vs just stack memory), but here for reference: #include <string.h>
    char *MyUrl1 = strdup("https://api.weather.gov/stations/KBOS/observations/latest"); // strdup() A posix function that incorporates malloc with a strcpy for convenience
    char *MyUrl2 = strdup("https://api.weather.gov/stations/KLGA/observations/latest"); 
    */
    
    char* MyUrl1 = "https://api.weather.gov/stations/KBOS/observations/latest"; //Boston Weather
    char* MyUrl2 = "https://api.weather.gov/stations/KLGA/observations/latest"; //NYC Weather
    MemType MyOutputStruct1;
    MemType MyOutputStruct2;
    

    SetUpCurlHandle( mult_hnd, MyUrl1, &MyOutputStruct1 );
    SetUpCurlHandle( mult_hnd, MyUrl2, &MyOutputStruct2 );

    PerformMultiCurl( mult_hnd );

    printf("Output:\n%s\n", MyOutputStruct1.memory);
    printf("Output:\n%s\n", MyOutputStruct2.memory);

    free( MyOutputStruct1.memory );
    free( MyOutputStruct2.memory );
    /*
    free( MyUrl1 ); // If using strdup()
    free( MyUrl2 );    
    */
    return 0;
}

这是同一个程序,用于处理 UTF-8。

//gcc -o multicurl -lcurl multicurlexample.c multicurl.c
#include "multicurl.h"

int parse_UTF8_bitstream(wchar_t **output_stream, const char *input_stream )
/*  
    Parse a UTF-8 char bytestream into a 4-byte wide wchar_t bytestream 
    [so we can address each UTF-8 character individually] 
    
    This parser will return -1 if it receives invalid Unicode.
    This parser will return -2 if it runs out of memory.
    This parser will return the length of the wide-char string,
    not counting NULL, if successful.
*/
{
    int len = 0; /* This will give us the number of wide-characters not counting NULL. */
    int i = 0; /* This iterates through the mb char stream. */
    int skip_value;
    int wc_size = sizeof( wchar_t ); /* The size of our destination datatype. */
    
    /* Initialize the output_stream */
    output_stream[ 0 ] = malloc ( 1 );
    wchar_t *temp;
    
    while ( input_stream[ i ] ){
        temp = ( wchar_t* ) realloc( output_stream[ 0 ], (len + 1) * wc_size );
        if (temp == NULL) return -2;
        
        output_stream[ 0 ] = temp;
      
        skip_value = mbtowc( &output_stream[ 0 ][ len ], &input_stream[ i ], wc_size );
        if (skip_value == -1) return -1;
        
        /* i skips this many chars to the next UTF-8 code. */
        i += skip_value;
        len = len + 1;
    }
    
    /* Make sure the last wide-character is NULL */
    temp = ( wchar_t* ) realloc( output_stream[ 0 ], (len + 1) * wc_size );
    if (temp == NULL) return -2;
    
    output_stream[ 0 ] = temp;
    output_stream[ 0 ][ len ] = 0;
    
    return len; /* This is the length of the wide character string. */
}

int main(){
    /* Notice that we had to set the locale here. */
    setlocale(LC_ALL, "");
    
    CURLM *mult_hnd = SetUpMultiCurlHandle();

    /*  Boston, NYC, San Francisco, and Chicago weather */
    char* MyUrl[ 4 ] = {"https://api.weather.gov/stations/KBOS/observations/latest",
                    "https://api.weather.gov/stations/KLGA/observations/latest",
                    "https://api.weather.gov/stations/SFOC1/observations/latest",
                    "https://api.weather.gov/stations/KORD/observations/latest"};
    
    MemType MyOutputStruct[ 4 ];
    
    for(int i=0; i<4; i++){
        SetUpCurlHandle( mult_hnd, MyUrl[ i ], &MyOutputStruct[ i ] );
    }

    PerformMultiCurl( mult_hnd );
    
    /* If you do not need to address individual characters in UTF-8 or if you are only using the ASCII 
        subset of UTF-8, parsing the result like this isn't necessary. */
        
    /* Parse the result into wide characters so we can address each UTF-8 character individually. */
    
    wchar_t* outputstream[ 4 ];
    
    for(int i=0; i<4; i++){
        int ret = parse_UTF8_bitstream( &outputstream[ i ], MyOutputStruct[ i ].memory );
        
        if( ret == -1 ) { 
            printf("\nThe parser received invalid Unicode.\n"); 
            exit( EXIT_FAILURE ); 
        }
        
        if( ret == -2 ) { 
            printf("Not Enough Memory, the parser realloc returned NULL.\n"); 
            exit( EXIT_FAILURE ); 
        }
    }
    
    /*  To illustrate addressing individual Unicode characters...
    it's more exciting using non-ascii characters...
    I don't have an example for you. */
    int i;
    for(int j=0; j<4; j++){
        printf("Output:\n");
        i = 0;
        while( outputstream[ j ][ i ] ){
            printf("%lc", outputstream[j][ i ] );
            i++;
        }
        printf("\n");
        
        free( MyOutputStruct[ j ].memory );
        free( outputstream[ j ] );
    }

    return 0;
}
#ifndef MULTICURL_HEADER_H
#define MULTICURL_HEADER_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>

#include <locale.h>


// A data structure to hold the result of the cURL request.
typedef struct{ 
    size_t size;
    char *memory;
} MemType;

// Function Prototypes
void *SetUpCurlHandle(CURLM *, char *, MemType *);
CURLM *SetUpMultiCurlHandle();
void *PerformMultiCurl(CURLM*);

#endif
#include "multicurl.h"

#define MAX_WAIT_MSECS 5*1000 /* Wait max. 5 seconds */

static size_t write_callback(void *ptr, size_t size, size_t nmemb, void *userdata){// cURL callback function [read in datastream to memory]
    // This prototype is provided by cURL, with an argument at the end for our data structure.
    // This function is repeatedly called by cURL until there is no more data in the data stream; *ptr [it is assumed cURL handles memory management for this pointer].
    size_t realsize = size * nmemb;// The number of bytes in the datastream [there is no NULL char]

    MemType *mem = (MemType *)userdata;
    char *tmp = realloc(mem->memory, mem->size + realsize + 1);// We add 1 for the NULL char.

    if (tmp == NULL){
        printf("Not Enough Memory, realloc returned NULL.\n");
        exit(EXIT_FAILURE);
    }

    mem->memory = tmp;
    memcpy(&(mem->memory[mem->size]), ptr, realsize);// Starting at the last element copy in datastream [it overwrites the last element]
    mem->size += realsize;// The actual size is realsize + 1, however realsize gives us the location of the last element.
    mem->memory[mem->size] = 0;// The datastream doesn't include a NULL char, so we zeroize the last element.
    // We overwrite the NULL char {the zeroized element} on the next callback iteration, if any.

    return realsize;// cURL crosschecks the datastream with this return value.
}

void *SetUpCurlHandle(CURLM * mh, char *url, MemType *output){
// Take in a multi handle pointer address, a URL and a struct pointer address, set up the curl easy handle and add it to the multi handle.
    CURL *hnd = NULL;
    output->memory = malloc(1);              // Initialize the memory component of the structure.
    output->size = 0;                        // Initialize the size component of the structure.

    // Initialize the cURL handle.
    hnd = curl_easy_init();

    if(hnd){

        // Setup the cURL options.
        curl_easy_setopt(hnd, CURLOPT_BUFFERSIZE, 102400L);
        curl_easy_setopt(hnd, CURLOPT_URL, url);// Set the request URL
        curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L);
        curl_easy_setopt(hnd, CURLOPT_USERAGENT, "curl/7.80.0");
        curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50L);
        curl_easy_setopt(hnd, CURLOPT_HTTP_VERSION, (long)CURL_HTTP_VERSION_2TLS);
        curl_easy_setopt(hnd, CURLOPT_FTP_SKIP_PASV_IP, 1L);
        curl_easy_setopt(hnd, CURLOPT_TCP_KEEPALIVE, 1L);
        curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, write_callback);// The callback function to write data to.
        curl_easy_setopt(hnd, CURLOPT_WRITEDATA, (void *)output);// Send the address of the data struct to callback func.
        //curl_easy_setopt(hnd, CURLOPT_VERBOSE, 1);

        curl_multi_add_handle(mh, hnd);
    }else{
        output->memory[0] = '[=12=]';
    }    
    return NULL;// The output struct was passed by reference no need to return anything.
}

CURLM *SetUpMultiCurlHandle(){
    curl_global_init(CURL_GLOBAL_ALL);

    CURLM * mh = curl_multi_init();
    return mh;
}

void *PerformMultiCurl(CURLM * mh) 
/*Take in a preset multi handle, request data from the remote server asynchronously {it's assumed cURL is using threads transparent to the calling program}.
   Remove the handles from memory.*/
{
    CURLMsg *msg=NULL;
    CURL *hnd = NULL;
    CURLcode return_code = 0;
    int still_running = 0;
    int msgs_left = 0;

    curl_multi_perform(mh, &still_running);// Perform the requests.
    do {
        int numfds=0;
        int res = curl_multi_wait(mh, NULL, 0, MAX_WAIT_MSECS, &numfds);
        if(res != CURLM_OK) {
            fprintf(stderr, "error: curl_multi_wait() returned %d\n", res);
            return NULL;
        }
        curl_multi_perform(mh, &still_running);
        
       /* Without this loop the program will proceed to the next statement, most likely before the messages are retrieved from the server.
           The easy handle requests are conducted asynchronously, but one multi handle request is obviously conducted sequentially (can use pthreads to make asynchronous multi requests).*/
    } while(still_running); 
    
    
    /* This portion of the code will clean up and remove the handles from memory, you could change this to make them more persistent */
    while ((msg = curl_multi_info_read(mh, &msgs_left))) {
        if (msg->msg == CURLMSG_DONE) {
            hnd = msg->easy_handle;

            return_code = msg->data.result;
            if(return_code!=CURLE_OK) {
                fprintf(stderr, "CURL error code: %d\n", msg->data.result);
                continue;
            }

            curl_multi_remove_handle(mh, hnd);
            curl_easy_cleanup(hnd);
            hnd = NULL;
        }
        else {
            fprintf(stderr, "error: after curl_multi_info_read(), CURLMsg=%d\n", msg->msg);
        }
    }

    curl_multi_cleanup(mh);
    curl_global_cleanup();
    return NULL;
}