使用通过 libcurl 检索到的 header 信息来确定文件名
Using header information retrieved with libcurl to determine file name
我正在尝试使用 libcurl 解析 header 信息,以保持我正在下载的原始文件名完整。我目前正在 header 的每一行中寻找 sub-string "Content-Disposition" 以找到文件名。我编写以下代码时的想法是通过编辑查找 table(在下面解释)来扩展我在 header 数据中查找的 sub-string 的数量。我遇到的问题是,无论 url 是什么,我根据 header 信息测试我的代码永远不会包含我正在寻找的 sub-string(目前只有 "Content-Disposition: ")。为什么我在 header 信息中看不到这个?
相关代码如下:
/**
* file_info_container - structure containing file information
* file_name: name of the file
* file_url: URL of the file
* file_type: type of the file
*
* Structure containing data to be accessed by different
* CURL operations.
*/
typedef struct {
char *file_name;
char *file_url;
char *file_type;
} file_info_container;
/**
* filename_tags - a lookup table of sub-strings to be compared with strings
* that may contain the original (remote) file name. The
* indexes of this table will always correspond with the
* table filename_denotations.
*/
static const char *filename_tags[] = {
"Content-Disposition: [=11=]",
};
/**
* filename_tags - a lookup table of expected patterns that directly relate
* to the location of a filename in a string known to have
* a filename. The indexes of this table will always
* correspond with the table filename_tags.
*/
static const char *filename_denotations[] = {
"filename=\"",
};
...
/**
* header_cb - callback function used by curl to parse header information
* @headerln: contents of current header line being parsed
* headerln_data_size: data size of headerln[n]
* headerln_n: number of elements in headerln[]
* @parsed_data: structure where file information is stored
*
* Populates parsed_data->file_name with either the filename provided by
* the file header (if it is available) or NULL if it is not.
*/
static size_t header_cb(char *headerln, size_t headerln_data_size,
size_t headerln_n, void *parsed_data)
{
char *bff = NULL, *filename = NULL;
file_info_container *file_info = (file_info_container*) parsed_data;
size_t i, ret = headerln_data_size * headerln_n;
for(i = 0; i < sizeof(filename_tags) / sizeof(filename_tags[0]); i++) {
filename = strstr(headerln, filename_tags[i]);
if(filename) {
bff = strstr(filename, filename_denotations[i]) +
strlen(filename_denotations[i]);
if(bff) {
if(strchr(bff, (int)'.'))
{
filename = malloc((strlen(bff) * sizeof(char)) + 1);
if(!filename) {
//malloc error
}
while(*bff != '.')
*filename++ = *bff++;
bff = realloc(filename, strlen((filename) + 1) *
sizeof(char));
if(!bff) {
//realloc error
}
filename = bff;
*filename = '[=11=]';
}
} else
filename = NULL;
}
}
if(filename) {
file_info->file_name = malloc((strlen(filename) + 1) *
sizeof(char));
if(!file_info->file_name) {
//malloc error
}
strcpy(file_info->file_name, filename);
}
return ret;
}
...
/*
* Will comment later when this function is more finalized
*/
void download_url(char* url)
{
char *bff = NULL, *temp_file;
CURL *curl_handle;
CURLcode res;
file_info_container file_info;
FILE *fp;
bff = tmpnam(NULL);
if(!bff) {
//error handling for tmpnam failure
}
bff++;
temp_file = malloc((strlen(bff) + 4) * sizeof(char));
if(!temp_file) {
//error handling for malloc failure
}
strcpy(temp_file, bff);
free(bff);
strcat(temp_file, "lok[=11=]");
fp = fopen(temp_file, "wb");
curl_handle = curl_easy_init();
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, header_cb);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, fp);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, fp);
res = curl_easy_perform(curl_handle);
fclose(fp);
curl_easy_getinfo(curl_handle, CURLINFO_EFFECTIVE_URL,
&file_info.file_url);
curl_easy_getinfo(curl_handle, CURLINFO_CONTENT_TYPE,
&file_info.file_type);
if(!file_info.file_type) {
file_info.file_type = malloc(8 * sizeof(char));
if(!file_info.file_type) {
//error handling for memory allocation failure
}
strcpy(file_info.file_type, "bin/unk[=11=]");
}
//build_filename(&file_info);
//rename the lok file with whatever we get later
free(temp_file);
curl_easy_cleanup(curl_handle);
return 0;
}
主函数只是调用了download_url(url).....
TL;DR 我的问题是,无论 URL 我传递给 download_url 什么,我从来没有在 header 中看到包含 [=23= 的行] "Content-Disposition: "
?
您请求一个特定文件。
服务器正在为您提供您请求的文件。
您已经知道您请求的文件的名称!
您请求:http://foobar.com/Example.pdf
服务器正在向您发送 Example.pdf
.
的内容
你怎么能说你不知道文件名呢?
如果您依赖 Content-Disposition
header,服务器不需要发送它,事实上,很少这样做。
Content-Disposition documentation
The Content-Disposition response-header field has been proposed as a means for the origin server to suggest a default filename if the user requests that the content is saved to a file.
我正在尝试使用 libcurl 解析 header 信息,以保持我正在下载的原始文件名完整。我目前正在 header 的每一行中寻找 sub-string "Content-Disposition" 以找到文件名。我编写以下代码时的想法是通过编辑查找 table(在下面解释)来扩展我在 header 数据中查找的 sub-string 的数量。我遇到的问题是,无论 url 是什么,我根据 header 信息测试我的代码永远不会包含我正在寻找的 sub-string(目前只有 "Content-Disposition: ")。为什么我在 header 信息中看不到这个?
相关代码如下:
/**
* file_info_container - structure containing file information
* file_name: name of the file
* file_url: URL of the file
* file_type: type of the file
*
* Structure containing data to be accessed by different
* CURL operations.
*/
typedef struct {
char *file_name;
char *file_url;
char *file_type;
} file_info_container;
/**
* filename_tags - a lookup table of sub-strings to be compared with strings
* that may contain the original (remote) file name. The
* indexes of this table will always correspond with the
* table filename_denotations.
*/
static const char *filename_tags[] = {
"Content-Disposition: [=11=]",
};
/**
* filename_tags - a lookup table of expected patterns that directly relate
* to the location of a filename in a string known to have
* a filename. The indexes of this table will always
* correspond with the table filename_tags.
*/
static const char *filename_denotations[] = {
"filename=\"",
};
...
/**
* header_cb - callback function used by curl to parse header information
* @headerln: contents of current header line being parsed
* headerln_data_size: data size of headerln[n]
* headerln_n: number of elements in headerln[]
* @parsed_data: structure where file information is stored
*
* Populates parsed_data->file_name with either the filename provided by
* the file header (if it is available) or NULL if it is not.
*/
static size_t header_cb(char *headerln, size_t headerln_data_size,
size_t headerln_n, void *parsed_data)
{
char *bff = NULL, *filename = NULL;
file_info_container *file_info = (file_info_container*) parsed_data;
size_t i, ret = headerln_data_size * headerln_n;
for(i = 0; i < sizeof(filename_tags) / sizeof(filename_tags[0]); i++) {
filename = strstr(headerln, filename_tags[i]);
if(filename) {
bff = strstr(filename, filename_denotations[i]) +
strlen(filename_denotations[i]);
if(bff) {
if(strchr(bff, (int)'.'))
{
filename = malloc((strlen(bff) * sizeof(char)) + 1);
if(!filename) {
//malloc error
}
while(*bff != '.')
*filename++ = *bff++;
bff = realloc(filename, strlen((filename) + 1) *
sizeof(char));
if(!bff) {
//realloc error
}
filename = bff;
*filename = '[=11=]';
}
} else
filename = NULL;
}
}
if(filename) {
file_info->file_name = malloc((strlen(filename) + 1) *
sizeof(char));
if(!file_info->file_name) {
//malloc error
}
strcpy(file_info->file_name, filename);
}
return ret;
}
...
/*
* Will comment later when this function is more finalized
*/
void download_url(char* url)
{
char *bff = NULL, *temp_file;
CURL *curl_handle;
CURLcode res;
file_info_container file_info;
FILE *fp;
bff = tmpnam(NULL);
if(!bff) {
//error handling for tmpnam failure
}
bff++;
temp_file = malloc((strlen(bff) + 4) * sizeof(char));
if(!temp_file) {
//error handling for malloc failure
}
strcpy(temp_file, bff);
free(bff);
strcat(temp_file, "lok[=11=]");
fp = fopen(temp_file, "wb");
curl_handle = curl_easy_init();
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
curl_easy_setopt(curl_handle, CURLOPT_HEADERFUNCTION, header_cb);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_cb);
curl_easy_setopt(curl_handle, CURLOPT_HEADERDATA, fp);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, fp);
res = curl_easy_perform(curl_handle);
fclose(fp);
curl_easy_getinfo(curl_handle, CURLINFO_EFFECTIVE_URL,
&file_info.file_url);
curl_easy_getinfo(curl_handle, CURLINFO_CONTENT_TYPE,
&file_info.file_type);
if(!file_info.file_type) {
file_info.file_type = malloc(8 * sizeof(char));
if(!file_info.file_type) {
//error handling for memory allocation failure
}
strcpy(file_info.file_type, "bin/unk[=11=]");
}
//build_filename(&file_info);
//rename the lok file with whatever we get later
free(temp_file);
curl_easy_cleanup(curl_handle);
return 0;
}
主函数只是调用了download_url(url).....
TL;DR 我的问题是,无论 URL 我传递给 download_url 什么,我从来没有在 header 中看到包含 [=23= 的行] "Content-Disposition: "
?
您请求一个特定文件。
服务器正在为您提供您请求的文件。
您已经知道您请求的文件的名称!
您请求:http://foobar.com/Example.pdf
服务器正在向您发送 Example.pdf
.
你怎么能说你不知道文件名呢?
如果您依赖 Content-Disposition
header,服务器不需要发送它,事实上,很少这样做。
Content-Disposition documentation
The Content-Disposition response-header field has been proposed as a means for the origin server to suggest a default filename if the user requests that the content is saved to a file.