使用通过 libcurl 检索到的 header 信息来确定文件名

Using header information retrieved with libcurl to determine file name

我正在尝试使用 libcurl 解析 header 信息,以保持我正在下载的原始文件名完整。我目前正在 header 的每一行中寻找 sub-string "Content-Disposition" 以找到文件名。我编写以下代码时的想法是通过编辑查找 table(在下面解释)来扩展我在 header 数据中查找的 sub-string 的数量。我遇到的问题是,无论 url 是什么,我根据 header 信息测试我的代码永远不会包含我正在寻找的 sub-string(目前只有 "Content-Disposition: ")。为什么我在 header 信息中看不到这个?

相关代码如下:

/**
 * file_info_container - structure containing file information
 * file_name: name of the file
 * file_url: URL of the file
 * file_type: type of the file
 * 
 * Structure containing data to be accessed by different
 * CURL operations.
 */
typedef struct {
    char *file_name;
    char *file_url;
    char *file_type;
}  file_info_container;

/**
 * filename_tags - a lookup table of sub-strings to be compared with strings
 *                   that may contain the original (remote) file name. The
 *                   indexes of this table will always correspond with the
 *                   table filename_denotations.
 */
static const char *filename_tags[] =  {
"Content-Disposition: [=11=]",
};

/**
 * filename_tags - a lookup table of expected patterns that directly relate
 *                   to the location of a filename in a string known to have
 *                   a filename. The indexes of this table will always
 *                   correspond with the table filename_tags.
 */
static const char *filename_denotations[] =  {
"filename=\"",
};

...

/**
 * header_cb - callback function used by curl to parse header information
 * @headerln: contents of current header line being parsed
 * headerln_data_size: data size of headerln[n]
 * headerln_n: number of elements in headerln[]
 * @parsed_data: structure where file information is stored
 *
 * Populates  parsed_data->file_name with either the filename provided by
 * the file header (if it is available) or NULL if it is not.
 */
static size_t header_cb(char *headerln, size_t headerln_data_size,
                        size_t headerln_n, void *parsed_data)
{
    char *bff = NULL, *filename = NULL;
    file_info_container *file_info = (file_info_container*) parsed_data;
    size_t i, ret = headerln_data_size * headerln_n;

    for(i = 0; i < sizeof(filename_tags) / sizeof(filename_tags[0]); i++) {
        filename = strstr(headerln, filename_tags[i]);
        if(filename) {
            bff = strstr(filename, filename_denotations[i]) +     
                         strlen(filename_denotations[i]);
            if(bff) {
                if(strchr(bff, (int)'.'))
                {
                    filename = malloc((strlen(bff) * sizeof(char)) + 1);
                    if(!filename) {
                        //malloc error
                    }
                    while(*bff != '.')
                        *filename++ = *bff++;
                    bff = realloc(filename, strlen((filename) + 1) * 
                                  sizeof(char));
                    if(!bff) {
                        //realloc error
                    }
                    filename = bff;
                    *filename = '[=11=]';
                }
            } else
                filename = NULL;
        }
    }
    if(filename) {
        file_info->file_name = malloc((strlen(filename) + 1) * 
                                      sizeof(char));
        if(!file_info->file_name) {
            //malloc error
        }
        strcpy(file_info->file_name, filename);
    }
    return ret;
}

...

/*
 * Will comment later when this function is more finalized
 */
void download_url(char* url)
{
    char *bff = NULL, *temp_file;
    CURL *curl_handle;
    CURLcode res;
    file_info_container file_info;
    FILE *fp;

    bff = tmpnam(NULL);
    if(!bff) {
        //error handling for tmpnam failure
    }
    bff++;
    temp_file = malloc((strlen(bff) + 4) * sizeof(char));
    if(!temp_file) {
        //error handling for malloc failure
    }
    strcpy(temp_file, bff);
    free(bff);
    strcat(temp_file, "lok[=11=]");
    fp = fopen(temp_file, "wb");
    curl_handle = curl_easy_init();
    curl_easy_setopt(curl_handle, CURLOPT_URL, url);
    curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, header_cb);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_cb);
    curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, fp);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, fp);
    res = curl_easy_perform(curl_handle);
    fclose(fp);
    curl_easy_getinfo(curl_handle, CURLINFO_EFFECTIVE_URL,
                      &file_info.file_url);
    curl_easy_getinfo(curl_handle, CURLINFO_CONTENT_TYPE,
                      &file_info.file_type);
    if(!file_info.file_type) {
        file_info.file_type = malloc(8 * sizeof(char));
        if(!file_info.file_type) {
            //error handling for memory allocation failure
        }
        strcpy(file_info.file_type, "bin/unk[=11=]");
    }
    //build_filename(&file_info);
    //rename the lok file with whatever we get later
    free(temp_file);
    curl_easy_cleanup(curl_handle);
    return 0;
}

主函数只是调用了download_url(url).....

TL;DR 我的问题是,无论 URL 我传递给 download_url 什么,我从来没有在 header 中看到包含 [=23= 的行] "Content-Disposition: "?

请求一个特定文件。
服务器正在为您提供您请求的文件。
已经知道您请求的文件的名称!

您请求:http://foobar.com/Example.pdf
服务器正在向您发送 Example.pdf.

的内容

你怎么能说你不知道文件名呢?


如果您依赖 Content-Disposition header,服务器不需要发送它,事实上,很少这样做。

Content-Disposition documentation

The Content-Disposition response-header field has been proposed as a means for the origin server to suggest a default filename if the user requests that the content is saved to a file.