段错误,但不在 valgrind 或 gdb 中
segfault, but not in valgrind or gdb
在我的项目中,有一个库包含使用 Autodesk 的 FBX SDK 2017.1 加载 fbx 的代码。
加载 fbx 在调试和发布时崩溃。崩溃以两种不同的方式发生,并且似乎是随机的:
- 崩溃要么只是 "Segmentation fault"(大部分时间)
- 崩溃是可能涉及崩溃的所有库的转储,暗示 realloc() 调用有问题。 (每隔一段时间)从消息的上下文中,我无法确定可能是哪个 realloc(消息后跟所有链接的库的转储)。
该代码确实包含 realloc() 调用,特别是在 FbxStream 的自定义实现中使用的缓冲区分配中
大多数代码路径与 windows 完全相同,仅重新实现了一些特定于平台的部分。在 windows 上,它 运行 符合预期。
令我震惊的是,如果我 运行 在 gdb 或 valgrind 中运行程序,崩溃就会消失!所以我着手寻找未初始化的members/values,但到目前为止我找不到任何可疑的东西。我使用了 CppDepend/CppCheck 和 VS2012 代码分析,但在未初始化的 variables/members
上都出现了空洞
提供一些 FBX 加载的背景知识; FBX SDK 有多种方法来处理不同类型的资源(obj、3ds、fbx 等)。它们可以从文件或流中加载。为了支持大文件,stream 选项是更相关的选项。下面的代码远非完美,但目前我最感兴趣的是 valgrind/gdb 不会崩溃的原因。我将 SDK 文档放在 ReadString 之上,因为它是最复杂的文档。
class MyFbxStream : public FbxStream{
uint32 m_FormatID;
uint32 m_Error;
EState m_State;
size_t m_Pos;
size_t m_Size;
const Engine::Buffer* const m_Buffer;
MyFbxStream& operator = (const MyFbxStream& other) const;
public:
MyFbxStream(const Engine::Buffer* const buffer)
: m_FormatID(0)
, m_Error(0)
, m_State(eClosed)
, m_Pos(0)
, m_Size(0)
, m_Buffer(buffer) {};
virtual ~MyFbxStream() {};
virtual bool Open(void* pStreamData) {
m_FormatID = *(uint32*)pStreamData;
m_Pos = 0;
m_State = eOpen;
m_Size = m_Buffer->GetSize();
return true;
}
virtual bool Close() {
m_Pos = m_Size = 0;
m_State = eClosed;
return true;
}
virtual int Read(void* pData, int pSize) const {
const unsigned char* data = (m_Buffer->GetBase(m_Pos));
const size_t bytesRead = m_Pos + pSize > m_Buffer->GetSize() ? (m_Buffer->GetSize() - m_Pos) : pSize;
const_cast<MyFbxStream*>(this)->m_Pos += bytesRead;
memcpy(pData, data, bytesRead);
return (int)bytesRead;
}
/** Read a string from the stream.
* The default implementation is written in terms of Read() but does not cope with DOS line endings.
* Subclasses may need to override this if DOS line endings are to be supported.
* \param pBuffer Pointer to the memory block where the read bytes are stored.
* \param pMaxSize Maximum number of bytes to be read from the stream.
* \param pStopAtFirstWhiteSpace Stop reading when any whitespace is encountered. Otherwise read to end of line (like fgets()).
* \return pBuffer, if successful, else NULL.
* \remark The default implementation terminates the \e pBuffer with a null character and assumes there is enough room for it.
* For example, a call with \e pMaxSize = 1 will fill \e pBuffer with the null character only. */
virtual char* ReadString(char* pBuffer, int pMaxSize, bool pStopAtFirstWhiteSpace = false) {
assert(!pStopAtFirstWhiteSpace); // "Not supported"
const size_t pSize = pMaxSize - 1;
if (pSize) {
const char* const base = (const char* const)m_Buffer->GetBase();
char* cBuffer = pBuffer;
const size_t totalSize = std::min(m_Buffer->GetSize(), (m_Pos + pSize));
const char* const maxSize = base + totalSize;
const char* sum = base + m_Pos;
bool done = false;
// first align the copy on alignment boundary (4byte)
while ((((size_t)sum & 0x3) != 0) && (sum < maxSize)) {
const unsigned char c = *sum++;
*cBuffer++ = c;
if ((c == '\n') || (c == '\r')) {
done = true;
break;
} }
// copy from alignment boundary to boundary (4byte)
if (!done) {
int64 newBytesRead = 0;
uint32* dBuffer = (uint32*)cBuffer;
const uint32* dBase = (uint32*)sum;
const uint32* const dmaxSize = ((uint32*)maxSize) - 1;
while (dBase < dmaxSize) {
const uint32 data = *(const uint32*const)dBase++;
*dBuffer++ = data;
if (((data & 0xff) == 0x0a) || ((data & 0xff) == 0x0d)) { // third bytes, 4 bytes read..
newBytesRead -= 3;
done = true;
break;
} else {
const uint32 shiftedData8 = data & 0xff00;
if ((shiftedData8 == 0x0a00) || (shiftedData8 == 0x0d00)) { // third bytes, 3 bytes read..
newBytesRead -= 2;
done = true;
break;
} else {
const uint32 shiftedData16 = data & 0xff0000;
if ((shiftedData16 == 0x0a0000) || (shiftedData16 == 0x0d0000)) { // second byte, 2 bytes read..
newBytesRead -= 1;
done = true;
break;
} else {
const uint32 shiftedData24 = data & 0xff000000;
if ((shiftedData24 == 0x0a000000) || (shiftedData24 == 0x0d000000)) { // first byte, 1 bytes read..
done = true;
break;
} } } } }
newBytesRead += (int64)dBuffer - (int64)cBuffer;
if (newBytesRead) {
sum += newBytesRead;
cBuffer += newBytesRead;
} }
// copy anything beyond the last alignment boundary (4byte)
if (!done) {
while (sum < maxSize) {
const unsigned char c = *sum++;
*cBuffer++ = c;
if ((c == '\n') || (c == '\r')) {
done = true;
break;
} } }
const size_t bytesRead = cBuffer - pBuffer;
if (bytesRead) {
const_cast<MyFbxStream*>(this)->m_Pos += bytesRead;
pBuffer[bytesRead] = 0;
return pBuffer;
} }
pBuffer = NULL;
return NULL;
}
virtual void Seek(const FbxInt64& pOffset, const FbxFile::ESeekPos& pSeekPos) {
switch (pSeekPos) {
case FbxFile::ESeekPos::eBegin: m_Pos = pOffset; break;
case FbxFile::ESeekPos::eCurrent: m_Pos += pOffset; break;
case FbxFile::ESeekPos::eEnd: m_Pos = m_Size - pOffset; break;
}
}
virtual long GetPosition() const { return (long)m_Pos; }
virtual void SetPosition(long position) { m_Pos = position; }
virtual void ClearError() { m_Error = 0; }
virtual int GetError() const { return m_Error; }
virtual EState GetState() { return m_State; }
virtual int GetReaderID() const { return m_FormatID; }
virtual int GetWriterID() const { return -1; } // readonly stream
virtual bool Flush() { return true; } // readonly stream
virtual int Write(const void* /*d*/, int /*s*/) { assert(false); return 0; } // readonly stream
};
我假设可能存在与 malloc/free/realloc 操作相关的未定义行为,而这些行为在 gdb 中不会以某种方式发生。但如果是这样的话,我也希望 Windows 二进制文件有问题。
此外,我不知道这是否相关,但是当我跟踪到 Open() 函数并打印 "m_Buffer" 指针的值(或 "this")时,我得到一个以 0xfffffff 开头的指针值。对于 Windows 程序员来说这看起来像个问题。但是,我能否在 linux 中得出相同的结论,因为我也看到这种情况发生在静态函数调用等中
if I run the program in either gdb or valgrind, the crash disappears!
有两种可能的解释:
- 有多个线程,代码表现出数据竞争,GDB 和 Valgrind 都会显着影响执行时间。
- GDB 禁用地址随机化; Valgrind 显着影响程序布局,崩溃对精确布局敏感。
我将采取的步骤:
在我的项目中,有一个库包含使用 Autodesk 的 FBX SDK 2017.1 加载 fbx 的代码。
加载 fbx 在调试和发布时崩溃。崩溃以两种不同的方式发生,并且似乎是随机的:
- 崩溃要么只是 "Segmentation fault"(大部分时间)
- 崩溃是可能涉及崩溃的所有库的转储,暗示 realloc() 调用有问题。 (每隔一段时间)从消息的上下文中,我无法确定可能是哪个 realloc(消息后跟所有链接的库的转储)。
该代码确实包含 realloc() 调用,特别是在 FbxStream 的自定义实现中使用的缓冲区分配中
大多数代码路径与 windows 完全相同,仅重新实现了一些特定于平台的部分。在 windows 上,它 运行 符合预期。
令我震惊的是,如果我 运行 在 gdb 或 valgrind 中运行程序,崩溃就会消失!所以我着手寻找未初始化的members/values,但到目前为止我找不到任何可疑的东西。我使用了 CppDepend/CppCheck 和 VS2012 代码分析,但在未初始化的 variables/members
上都出现了空洞提供一些 FBX 加载的背景知识; FBX SDK 有多种方法来处理不同类型的资源(obj、3ds、fbx 等)。它们可以从文件或流中加载。为了支持大文件,stream 选项是更相关的选项。下面的代码远非完美,但目前我最感兴趣的是 valgrind/gdb 不会崩溃的原因。我将 SDK 文档放在 ReadString 之上,因为它是最复杂的文档。
class MyFbxStream : public FbxStream{
uint32 m_FormatID;
uint32 m_Error;
EState m_State;
size_t m_Pos;
size_t m_Size;
const Engine::Buffer* const m_Buffer;
MyFbxStream& operator = (const MyFbxStream& other) const;
public:
MyFbxStream(const Engine::Buffer* const buffer)
: m_FormatID(0)
, m_Error(0)
, m_State(eClosed)
, m_Pos(0)
, m_Size(0)
, m_Buffer(buffer) {};
virtual ~MyFbxStream() {};
virtual bool Open(void* pStreamData) {
m_FormatID = *(uint32*)pStreamData;
m_Pos = 0;
m_State = eOpen;
m_Size = m_Buffer->GetSize();
return true;
}
virtual bool Close() {
m_Pos = m_Size = 0;
m_State = eClosed;
return true;
}
virtual int Read(void* pData, int pSize) const {
const unsigned char* data = (m_Buffer->GetBase(m_Pos));
const size_t bytesRead = m_Pos + pSize > m_Buffer->GetSize() ? (m_Buffer->GetSize() - m_Pos) : pSize;
const_cast<MyFbxStream*>(this)->m_Pos += bytesRead;
memcpy(pData, data, bytesRead);
return (int)bytesRead;
}
/** Read a string from the stream.
* The default implementation is written in terms of Read() but does not cope with DOS line endings.
* Subclasses may need to override this if DOS line endings are to be supported.
* \param pBuffer Pointer to the memory block where the read bytes are stored.
* \param pMaxSize Maximum number of bytes to be read from the stream.
* \param pStopAtFirstWhiteSpace Stop reading when any whitespace is encountered. Otherwise read to end of line (like fgets()).
* \return pBuffer, if successful, else NULL.
* \remark The default implementation terminates the \e pBuffer with a null character and assumes there is enough room for it.
* For example, a call with \e pMaxSize = 1 will fill \e pBuffer with the null character only. */
virtual char* ReadString(char* pBuffer, int pMaxSize, bool pStopAtFirstWhiteSpace = false) {
assert(!pStopAtFirstWhiteSpace); // "Not supported"
const size_t pSize = pMaxSize - 1;
if (pSize) {
const char* const base = (const char* const)m_Buffer->GetBase();
char* cBuffer = pBuffer;
const size_t totalSize = std::min(m_Buffer->GetSize(), (m_Pos + pSize));
const char* const maxSize = base + totalSize;
const char* sum = base + m_Pos;
bool done = false;
// first align the copy on alignment boundary (4byte)
while ((((size_t)sum & 0x3) != 0) && (sum < maxSize)) {
const unsigned char c = *sum++;
*cBuffer++ = c;
if ((c == '\n') || (c == '\r')) {
done = true;
break;
} }
// copy from alignment boundary to boundary (4byte)
if (!done) {
int64 newBytesRead = 0;
uint32* dBuffer = (uint32*)cBuffer;
const uint32* dBase = (uint32*)sum;
const uint32* const dmaxSize = ((uint32*)maxSize) - 1;
while (dBase < dmaxSize) {
const uint32 data = *(const uint32*const)dBase++;
*dBuffer++ = data;
if (((data & 0xff) == 0x0a) || ((data & 0xff) == 0x0d)) { // third bytes, 4 bytes read..
newBytesRead -= 3;
done = true;
break;
} else {
const uint32 shiftedData8 = data & 0xff00;
if ((shiftedData8 == 0x0a00) || (shiftedData8 == 0x0d00)) { // third bytes, 3 bytes read..
newBytesRead -= 2;
done = true;
break;
} else {
const uint32 shiftedData16 = data & 0xff0000;
if ((shiftedData16 == 0x0a0000) || (shiftedData16 == 0x0d0000)) { // second byte, 2 bytes read..
newBytesRead -= 1;
done = true;
break;
} else {
const uint32 shiftedData24 = data & 0xff000000;
if ((shiftedData24 == 0x0a000000) || (shiftedData24 == 0x0d000000)) { // first byte, 1 bytes read..
done = true;
break;
} } } } }
newBytesRead += (int64)dBuffer - (int64)cBuffer;
if (newBytesRead) {
sum += newBytesRead;
cBuffer += newBytesRead;
} }
// copy anything beyond the last alignment boundary (4byte)
if (!done) {
while (sum < maxSize) {
const unsigned char c = *sum++;
*cBuffer++ = c;
if ((c == '\n') || (c == '\r')) {
done = true;
break;
} } }
const size_t bytesRead = cBuffer - pBuffer;
if (bytesRead) {
const_cast<MyFbxStream*>(this)->m_Pos += bytesRead;
pBuffer[bytesRead] = 0;
return pBuffer;
} }
pBuffer = NULL;
return NULL;
}
virtual void Seek(const FbxInt64& pOffset, const FbxFile::ESeekPos& pSeekPos) {
switch (pSeekPos) {
case FbxFile::ESeekPos::eBegin: m_Pos = pOffset; break;
case FbxFile::ESeekPos::eCurrent: m_Pos += pOffset; break;
case FbxFile::ESeekPos::eEnd: m_Pos = m_Size - pOffset; break;
}
}
virtual long GetPosition() const { return (long)m_Pos; }
virtual void SetPosition(long position) { m_Pos = position; }
virtual void ClearError() { m_Error = 0; }
virtual int GetError() const { return m_Error; }
virtual EState GetState() { return m_State; }
virtual int GetReaderID() const { return m_FormatID; }
virtual int GetWriterID() const { return -1; } // readonly stream
virtual bool Flush() { return true; } // readonly stream
virtual int Write(const void* /*d*/, int /*s*/) { assert(false); return 0; } // readonly stream
};
我假设可能存在与 malloc/free/realloc 操作相关的未定义行为,而这些行为在 gdb 中不会以某种方式发生。但如果是这样的话,我也希望 Windows 二进制文件有问题。
此外,我不知道这是否相关,但是当我跟踪到 Open() 函数并打印 "m_Buffer" 指针的值(或 "this")时,我得到一个以 0xfffffff 开头的指针值。对于 Windows 程序员来说这看起来像个问题。但是,我能否在 linux 中得出相同的结论,因为我也看到这种情况发生在静态函数调用等中
if I run the program in either gdb or valgrind, the crash disappears!
有两种可能的解释:
- 有多个线程,代码表现出数据竞争,GDB 和 Valgrind 都会显着影响执行时间。
- GDB 禁用地址随机化; Valgrind 显着影响程序布局,崩溃对精确布局敏感。
我将采取的步骤: