helgrind 中的 libxml2 多线程错误
libxml2 multithreading errors in helgrind
我有以下非常简单的程序:
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <libxml/parser.h>
#include <libxml/catalog.h>
#include <libxml/tree.h>
#include <libxml/HTMLparser.h>
static const int kHTMLParseFlags =
HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET;
void* test1(void* ptr)
{
htmlDocPtr doc = htmlReadFile("http://www.google.com", NULL, kHTMLParseFlags);
xmlFreeDoc(doc);
}
void* test2(void* ptr)
{
htmlDocPtr doc = htmlReadFile("http://www.lenta.ru", NULL, kHTMLParseFlags);
xmlFreeDoc(doc);
}
int main(void)
{
xmlInitParser();
xmlInitializeCatalog();
pthread_t thread1, thread2;
pthread_create(&thread1, NULL, &test1, NULL);
pthread_create(&thread2, NULL, &test2, NULL);
pthread_join(thread1, NULL);
pthread_join(thread2, NULL);
xmlCatalogCleanup();
xmlCleanupParser();
return EXIT_SUCCESS;
}
当我 运行 它在 valgrind --tool=helgrind
时它显示了很多竞争条件(ns_makecanon
,等等)。这是什么原因?似乎 libxml2
是线程安全的。我正在用 gcc -I/usr/include/libxml2 temp.c -lxml2 -pthread
.
编译它
来自 valgrind
输出的样本:
==2276== Possible data race during read of size 4 at 0x5EC2020 by thread #3
==2276== Locks held: none
==2276== at 0x4EE5117: xmlCatalogXMLResolve (catalog.c:1637)
==2276== by 0x4EE4F15: xmlCatalogListXMLResolve (catalog.c:2064)
==2276== by 0x4EE5FBE: xmlACatalogResolve (catalog.c:2875)
==2276== by 0x4EA21EA: xmlResolveResourceFromCatalog (xmlIO.c:4000)
==2276== by 0x4EA46C3: xmlDefaultExternalEntityLoader (xmlIO.c:4065)
==2276== by 0x4EA452E: xmlLoadExternalEntity (xmlIO.c:4133)
==2276== by 0x4EB9D88: htmlCreateFileParserCtxt (HTMLparser.c:6314)
==2276== by 0x4EBA97C: htmlReadFile (HTMLparser.c:6796)
==2276== by 0x400A18: test2(void*) (in /home/a.out)
==2276== by 0x4C32DF6: ??? (in /usr/lib/valgrind/vgpreload_helgrind-amd64-linux.so)
==2276== by 0x51AC6A9: start_thread (pthread_create.c:333)
==2276==
==2276== This conflicts with a previous write of size 4 by thread #2
==2276== Locks held: none
==2276== at 0x4EE5131: xmlCatalogXMLResolve (catalog.c:1643)
==2276== by 0x4EE4F15: xmlCatalogListXMLResolve (catalog.c:2064)
==2276== by 0x4EE5FBE: xmlACatalogResolve (catalog.c:2875)
==2276== by 0x4EA21EA: xmlResolveResourceFromCatalog (xmlIO.c:4000)
==2276== by 0x4EA46C3: xmlDefaultExternalEntityLoader (xmlIO.c:4065)
==2276== by 0x4EA452E: xmlLoadExternalEntity (xmlIO.c:4133)
==2276== by 0x4EB9D88: htmlCreateFileParserCtxt (HTMLparser.c:6314)
==2276== by 0x4EBA97C: htmlReadFile (HTMLparser.c:6796)
==2276== Address 0x5ec2020 is 64 bytes inside a block of size 80 alloc'd
==2276== at 0x4C2CFEF: malloc (in /usr/lib/valgrind/vgpreload_helgrind-amd64-linux.so)
==2276== by 0x4EE2D3F: xmlNewCatalogEntry (catalog.c:280)
==2276== by 0x4EE3340: xmlParseXMLCatalogOneNode (catalog.c:1187)
==2276== by 0x4EE3871: xmlParseXMLCatalogNode (catalog.c:1256)
==2276== by 0x4EE3871: xmlParseXMLCatalogNodeList (catalog.c:1323)
==2276== by 0x4EE4C3E: xmlParseXMLCatalogFile (catalog.c:1388)
==2276== by 0x4EE4C3E: xmlFetchXMLCatalogFile (catalog.c:1453)
==2276== by 0x4EE4FEF: xmlCatalogListXMLResolve (catalog.c:2061)
==2276== by 0x4EE5FBE: xmlACatalogResolve (catalog.c:2875)
==2276== by 0x4EA21EA: xmlResolveResourceFromCatalog (xmlIO.c:4000)
==2276== by 0x4EA46C3: xmlDefaultExternalEntityLoader (xmlIO.c:4065)
==2276== by 0x4EA452E: xmlLoadExternalEntity (xmlIO.c:4133)
==2276== by 0x4EB9D88: htmlCreateFileParserCtxt (HTMLparser.c:6314)
==2276== by 0x4EBA97C: htmlReadFile (HTMLparser.c:6796)
==2276== Block was alloc'd by thread #2
这确实是 libxml2 中的一个错误。 following code in xmlCatalogXMLResolve
不是线程安全的(应该使用原子增量或锁):
if (catal->depth > MAX_CATAL_DEPTH) {
xmlCatalogErr(catal, NULL, XML_CATALOG_RECURSION,
"Detected recursion in catalog %s\n",
catal->name, NULL, NULL);
return(NULL);
}
catal->depth++;
libxml2 文档指出 concurrent loading should be thread-safe. I'd suggest you file a bug and/or raise the issue on the mailing list。
我有以下非常简单的程序:
#include <stdlib.h>
#include <stdio.h>
#include <pthread.h>
#include <libxml/parser.h>
#include <libxml/catalog.h>
#include <libxml/tree.h>
#include <libxml/HTMLparser.h>
static const int kHTMLParseFlags =
HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET;
void* test1(void* ptr)
{
htmlDocPtr doc = htmlReadFile("http://www.google.com", NULL, kHTMLParseFlags);
xmlFreeDoc(doc);
}
void* test2(void* ptr)
{
htmlDocPtr doc = htmlReadFile("http://www.lenta.ru", NULL, kHTMLParseFlags);
xmlFreeDoc(doc);
}
int main(void)
{
xmlInitParser();
xmlInitializeCatalog();
pthread_t thread1, thread2;
pthread_create(&thread1, NULL, &test1, NULL);
pthread_create(&thread2, NULL, &test2, NULL);
pthread_join(thread1, NULL);
pthread_join(thread2, NULL);
xmlCatalogCleanup();
xmlCleanupParser();
return EXIT_SUCCESS;
}
当我 运行 它在 valgrind --tool=helgrind
时它显示了很多竞争条件(ns_makecanon
,等等)。这是什么原因?似乎 libxml2
是线程安全的。我正在用 gcc -I/usr/include/libxml2 temp.c -lxml2 -pthread
.
来自 valgrind
输出的样本:
==2276== Possible data race during read of size 4 at 0x5EC2020 by thread #3
==2276== Locks held: none
==2276== at 0x4EE5117: xmlCatalogXMLResolve (catalog.c:1637)
==2276== by 0x4EE4F15: xmlCatalogListXMLResolve (catalog.c:2064)
==2276== by 0x4EE5FBE: xmlACatalogResolve (catalog.c:2875)
==2276== by 0x4EA21EA: xmlResolveResourceFromCatalog (xmlIO.c:4000)
==2276== by 0x4EA46C3: xmlDefaultExternalEntityLoader (xmlIO.c:4065)
==2276== by 0x4EA452E: xmlLoadExternalEntity (xmlIO.c:4133)
==2276== by 0x4EB9D88: htmlCreateFileParserCtxt (HTMLparser.c:6314)
==2276== by 0x4EBA97C: htmlReadFile (HTMLparser.c:6796)
==2276== by 0x400A18: test2(void*) (in /home/a.out)
==2276== by 0x4C32DF6: ??? (in /usr/lib/valgrind/vgpreload_helgrind-amd64-linux.so)
==2276== by 0x51AC6A9: start_thread (pthread_create.c:333)
==2276==
==2276== This conflicts with a previous write of size 4 by thread #2
==2276== Locks held: none
==2276== at 0x4EE5131: xmlCatalogXMLResolve (catalog.c:1643)
==2276== by 0x4EE4F15: xmlCatalogListXMLResolve (catalog.c:2064)
==2276== by 0x4EE5FBE: xmlACatalogResolve (catalog.c:2875)
==2276== by 0x4EA21EA: xmlResolveResourceFromCatalog (xmlIO.c:4000)
==2276== by 0x4EA46C3: xmlDefaultExternalEntityLoader (xmlIO.c:4065)
==2276== by 0x4EA452E: xmlLoadExternalEntity (xmlIO.c:4133)
==2276== by 0x4EB9D88: htmlCreateFileParserCtxt (HTMLparser.c:6314)
==2276== by 0x4EBA97C: htmlReadFile (HTMLparser.c:6796)
==2276== Address 0x5ec2020 is 64 bytes inside a block of size 80 alloc'd
==2276== at 0x4C2CFEF: malloc (in /usr/lib/valgrind/vgpreload_helgrind-amd64-linux.so)
==2276== by 0x4EE2D3F: xmlNewCatalogEntry (catalog.c:280)
==2276== by 0x4EE3340: xmlParseXMLCatalogOneNode (catalog.c:1187)
==2276== by 0x4EE3871: xmlParseXMLCatalogNode (catalog.c:1256)
==2276== by 0x4EE3871: xmlParseXMLCatalogNodeList (catalog.c:1323)
==2276== by 0x4EE4C3E: xmlParseXMLCatalogFile (catalog.c:1388)
==2276== by 0x4EE4C3E: xmlFetchXMLCatalogFile (catalog.c:1453)
==2276== by 0x4EE4FEF: xmlCatalogListXMLResolve (catalog.c:2061)
==2276== by 0x4EE5FBE: xmlACatalogResolve (catalog.c:2875)
==2276== by 0x4EA21EA: xmlResolveResourceFromCatalog (xmlIO.c:4000)
==2276== by 0x4EA46C3: xmlDefaultExternalEntityLoader (xmlIO.c:4065)
==2276== by 0x4EA452E: xmlLoadExternalEntity (xmlIO.c:4133)
==2276== by 0x4EB9D88: htmlCreateFileParserCtxt (HTMLparser.c:6314)
==2276== by 0x4EBA97C: htmlReadFile (HTMLparser.c:6796)
==2276== Block was alloc'd by thread #2
这确实是 libxml2 中的一个错误。 following code in xmlCatalogXMLResolve
不是线程安全的(应该使用原子增量或锁):
if (catal->depth > MAX_CATAL_DEPTH) {
xmlCatalogErr(catal, NULL, XML_CATALOG_RECURSION,
"Detected recursion in catalog %s\n",
catal->name, NULL, NULL);
return(NULL);
}
catal->depth++;
libxml2 文档指出 concurrent loading should be thread-safe. I'd suggest you file a bug and/or raise the issue on the mailing list。