为什么 MySQL 会打破 wget_iri_relative_to_abs?
Why would MySQL break wget_iri_relative_to_abs?
考虑以下复制器:
#include <wget.h>
#include <string.h>
#include <mysql.h>
MYSQL *mysql_con;
void
mysql_stop()
{
mysql_close(mysql_con);
}
void
mysql_start()
{
mysql_con = mysql_init(NULL);
if (mysql_con == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysql_con));
exit(1);
}
if (mysql_real_connect(mysql_con, "localhost", "crawler", "password", "crawl", 0, NULL, 0) == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysql_con));
exit(EXIT_FAILURE);
}
}
const char * parseURI (char *base_url, char *url)
{
wget_iri *base = wget_iri_parse(base_url, NULL);
wget_buffer *buf = wget_buffer_alloc(8192);
const char *uri = wget_iri_relative_to_abs(base, url, strlen(url), buf);
wget_buffer_free(&buf);
wget_iri_free(&base);
return uri;
}
void crawler_init()
{
const char *url2 = parseURI("http://www.google.com/", "/intl/en/policies/privacy/");
printf("%s\n", url2);
}
int main (int argc, char **argv)
{
const char *url2 = parseURI("http://www.google.com/", "/intl/en/policies/privacy/");
printf("%s\n", url2);
mysql_start();
crawler_init();
mysql_stop();
}
这个程序运行的输出是:
http://www.google.com/intl/en/policies/privacy/
��7�
main 中的第一个 printf 按预期打印出绝对 url。
crawler_init 中的第二个 printf 打印出垃圾而不是相同的绝对 url.
这似乎与 mysql_real_connect
有关,因为当此调用被注释掉时 crawler_init 中的第二个 printf 打印出预期的绝对 url.
为什么?
添加 buf->data = NULL;
解决了这个问题:
#include <wget.h>
#include <string.h>
#include <mysql.h>
MYSQL *mysql_con;
void
mysql_stop()
{
mysql_close(mysql_con);
}
void
mysql_start()
{
mysql_con = mysql_init(NULL);
if (mysql_con == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysql_con));
exit(1);
}
if (mysql_real_connect(mysql_con, "localhost", "crawler", "1q2w3e4r", "crawl", 0, NULL, 0) == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysql_con));
exit(EXIT_FAILURE);
}
}
const char * parseURI (char *base_url, char *url)
{
wget_iri *base = wget_iri_parse(base_url, NULL);
wget_buffer *buf = wget_buffer_alloc(8192);
const char *uri = wget_iri_relative_to_abs(base, url, strlen(url), buf);
buf->data = NULL;
wget_buffer_free(&buf);
wget_iri_free(&base);
return uri;
}
void crawler_init()
{
const char *url2 = parseURI("http://www.google.com/", "/intl/en/policies/privacy/");
printf("%s\n", url2);
}
int main (int argc, char **argv)
{
const char *url2 = parseURI("http://www.google.com/", "/intl/en/policies/privacy/");
printf("%s\n", url2);
mysql_start();
crawler_init();
mysql_stop();
}
考虑以下复制器:
#include <wget.h>
#include <string.h>
#include <mysql.h>
MYSQL *mysql_con;
void
mysql_stop()
{
mysql_close(mysql_con);
}
void
mysql_start()
{
mysql_con = mysql_init(NULL);
if (mysql_con == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysql_con));
exit(1);
}
if (mysql_real_connect(mysql_con, "localhost", "crawler", "password", "crawl", 0, NULL, 0) == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysql_con));
exit(EXIT_FAILURE);
}
}
const char * parseURI (char *base_url, char *url)
{
wget_iri *base = wget_iri_parse(base_url, NULL);
wget_buffer *buf = wget_buffer_alloc(8192);
const char *uri = wget_iri_relative_to_abs(base, url, strlen(url), buf);
wget_buffer_free(&buf);
wget_iri_free(&base);
return uri;
}
void crawler_init()
{
const char *url2 = parseURI("http://www.google.com/", "/intl/en/policies/privacy/");
printf("%s\n", url2);
}
int main (int argc, char **argv)
{
const char *url2 = parseURI("http://www.google.com/", "/intl/en/policies/privacy/");
printf("%s\n", url2);
mysql_start();
crawler_init();
mysql_stop();
}
这个程序运行的输出是:
http://www.google.com/intl/en/policies/privacy/
��7�
main 中的第一个 printf 按预期打印出绝对 url。 crawler_init 中的第二个 printf 打印出垃圾而不是相同的绝对 url.
这似乎与 mysql_real_connect
有关,因为当此调用被注释掉时 crawler_init 中的第二个 printf 打印出预期的绝对 url.
为什么?
添加 buf->data = NULL;
解决了这个问题:
#include <wget.h>
#include <string.h>
#include <mysql.h>
MYSQL *mysql_con;
void
mysql_stop()
{
mysql_close(mysql_con);
}
void
mysql_start()
{
mysql_con = mysql_init(NULL);
if (mysql_con == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysql_con));
exit(1);
}
if (mysql_real_connect(mysql_con, "localhost", "crawler", "1q2w3e4r", "crawl", 0, NULL, 0) == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysql_con));
exit(EXIT_FAILURE);
}
}
const char * parseURI (char *base_url, char *url)
{
wget_iri *base = wget_iri_parse(base_url, NULL);
wget_buffer *buf = wget_buffer_alloc(8192);
const char *uri = wget_iri_relative_to_abs(base, url, strlen(url), buf);
buf->data = NULL;
wget_buffer_free(&buf);
wget_iri_free(&base);
return uri;
}
void crawler_init()
{
const char *url2 = parseURI("http://www.google.com/", "/intl/en/policies/privacy/");
printf("%s\n", url2);
}
int main (int argc, char **argv)
{
const char *url2 = parseURI("http://www.google.com/", "/intl/en/policies/privacy/");
printf("%s\n", url2);
mysql_start();
crawler_init();
mysql_stop();
}