Unix 套接字设置 O_NONBLOCK 与 php 通信 -fpm get segmentfault

Unix socket set O_NONBLOCK to communication with php-fpm get segmentfault

我正在编写一个 Web 服务器来支持 FastCGI。使用Unix socket与php-fpm通信,不能设置non-block选项,会导致php-fpm响应解析器访问非法内存

我使用socket() 设置非阻塞选项,使用fcntl() 设置非阻塞选项会导致非法内存访问。取消非阻塞选项后,一切正常。但是我的Web Server是一个非阻塞的事件驱动模型,所以我必须使用Unix套接字进行非阻塞通信。

test.cc

/**
 * Created by Crow on 12/27/18.
 * Copyright (c) 2018 Crow All rights reserved.
 * @author Crow
 * @brief  This file is test the ResponseParser
 * @details construct the request, send/write it to the peer endpoint.
 *          use tcpdump can get the result [if php-fpm listened on TCP socket]
 *          $ sudo tcpdump port xxxx -i lo -vv -w a.cap
 *          $ wireshark a.cap
 */

#include <fcntl.h>
#include <unistd.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <arpa/inet.h>

#include <cstdio>
#include <iostream>
#include <fstream>

#include "protocol/fastCGI/request_builder.h"
#include "protocol/fastCGI/response_parser.h"

int main()
{
  std::map<std::string, std::string> param_map;
  param_map.insert({"REMOTE_PORT", "80"});
  param_map.insert({"REMOTE_ADDR", "127.0.0.1"});
  param_map.insert({"REQUEST_METHOD", "POST"});
  param_map.insert({"SERVER_PROTOCOL", "HTTP/1.1"});
  param_map.insert({"SCRIPT_FILENAME", "/home/Crow/1.php"});
  param_map.insert({"CONTENT_LENGTH", "11"});
  std::string in_str("a=b&c=d&e=f");
  platinum::fcgi::RequestBuilder builder(3, 11, in_str, param_map);

  builder.Build();

  auto b = builder.begin_requset();
  auto p = builder.fcgi_params();
  auto i = builder.fcgi_in();

  errno = 0;
//  ssize_t ret{};
//  int fd = ::socket(AF_INET, SOCK_STREAM, 0);
//  struct sockaddr_in addr{};
//  addr.sin_family = AF_INET;
//  addr.sin_port = ::htons(9000);
//  addr.sin_addr.s_addr = ::inet_addr("127.0.0.1");
  int fd = ::socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC , 0);   <= Here! ! !
  auto flag = ::fcntl(fd, F_GETFL);
  flag |= O_NONBLOCK;
  if (::fcntl(fd, F_SETFL, flag)) {
    perror("fcntl");
    std::abort();
  }
  struct sockaddr_un addr{};
  addr.sun_family = AF_UNIX;
  ::strcpy(addr.sun_path, "/home/Crow/xfc.sock");
  auto ret = ::connect(fd, (const struct sockaddr *) &addr, sizeof(addr));
  if (ret < 0)
    perror("connect");

 ::write(fd, reinterpret_cast<void *>(&b), sizeof(b));
  for (const auto &var : p) {
    ::write(fd, reinterpret_cast<void *>(var.first.get()), static_cast<size_t>(var.second));
    perror("write");
  }
  for (const auto &var : i) {
    ::write(fd, reinterpret_cast<void *>(var.first.get()), static_cast<size_t>(var.second));
    perror("write");
  }

  char buf[10000];
  std::vector<unsigned char> data(1024);
  platinum::fcgi::ResponseParser parser;
  while (!parser.Complete()) {
    ret = ::read(fd, data.data(), 1024);
    parser.feed(data.cbegin(), static_cast<int>(ret));
    auto stdout_ = parser.transform_data();
    std::string str(stdout_.cbegin(), stdout_.cend());
    std::cout << str << std::endl;
  }
  close(fd);

  return 0;
}

fastCGI/response_parser.h

/**
 * Created on 12/26/18.
 * Copyright (c) 2018 Crow All rights reserved.
 * @author Crow
 * @brief
 */

#ifndef PLATINUM_RESPONSE_PARSER_H
#define PLATINUM_RESPONSE_PARSER_H

#include "base.h"
#include "protocol/fastCGI/component.h"
#include "protocol/parser.hpp"

namespace platinum {
namespace fcgi {

enum State : int {
  COMPLETED,
  UNCOMPLETED,
  FAULT,
};

 class ResponseParser : public platinum::Parser {
 public:
  using const_iter = std::vector<FCGIData>::const_iterator;
  ResponseParser();
  ~ResponseParser() override = default;

  long feed(const_iter iter, long length);

  auto transform_data() -> const std::vector<FCGIData> & {
    return transform_data_;
  }
  int request_id() { return request_id_; }
  long long app_status() { return app_status_; }
  State state() { return static_cast<State>(state_); }
  Status status() { return static_cast<Status>(status_); }
  bool Complete() { return complete_; }
  void Reset();

 private:
  void ParseStdout(const_iter &iter, long &length, long ct_len, long pd_len);
  void ParseStderr(const_iter &iter, long &length, long ct_len, long pd_len);
  void ParseEndRequest(const_iter &iter);

  std::vector<FCGIData> transform_data_;
  std::vector<FCGIData> name_value_data_;
  int request_id_;
  long transform_len_;
  long padding_len_;
  long long app_status_;

  bool complete_;
  bool in_content_;
  State state_;
  Status status_;
};

}
}

#endif //PLATINUM_RESPONSE_PARSER_H

fastCGI/response_parser.cc

/**
 * Created by Crow on 12/26/18.
 * Copyright (c) 2018 Crow All rights reserved.
 * @author Crow
 * @brief This file is Class ResponseParser. It can be reentrant
 */

#include "response_parser.h"

#include <cstring>
#include <string>

using namespace platinum::fcgi;

ResponseParser::ResponseParser()
    : request_id_(-1),
      transform_len_(0),
      padding_len_(0),
      app_status_(-1),
      complete_(false),
      in_content_(false),
      state_(State::UNCOMPLETED),
      status_(Status::FCGI_UNKNOWN_ROLE)
{
  transform_data_.reserve(1024);         // make sure reserve space for transform_data_
}

/**
 * @brief feed() the core to parse the FCGI response
 * @param iter  Buffer's cosnt iterator
 * @param length Buffer's length this time
 * @return parse result
 */

long ResponseParser::feed(ResponseParser::const_iter iter, long length)
{
  auto len_temp(length);
  transform_data_.clear();
  // To ensure the last parsing result is complete
  if (transform_len_) {
    auto len = transform_len_ > length ? length : transform_len_;
    transform_data_.insert(transform_data_.cend(), iter, iter + len);
    length -= len;                                                     // reduce the length
    iter += len;                                                       // move the iter
    transform_len_ -= len;
  }

  if (length == 0) {
    return (len_temp - length);
  } else if (padding_len_) {
    auto len = padding_len_ > length ? length : padding_len_;
    length -= len;
    iter += len;
    padding_len_ -= len;
  }

  while (length) {    // the whole parsing process continus utils length < 0
    if (state_ == State::COMPLETED
        || state_ == State::FAULT
        || length < sizeof(Header))
    {
      return (len_temp - length);
    }

    Header header(iter);                                                           // Construct a header
    iter += sizeof(Header);
    length -= sizeof(Header);
    request_id_ = header.request_id();
    auto ct_len = header.content_length();
    auto pd_len = header.padding_length();
    switch (header.type()) {
      case Type::FCGI_STDOUT: ParseStdout(iter, length, ct_len, pd_len); break;
      case Type::FCGI_STDERR: ParseStderr(iter, length, ct_len, pd_len); break;
      case Type::FCGI_END_REQUEST: ParseEndRequest(iter); break;
      default: break;
    }
  }

  return len_temp - length;
}

/**
 * @brief To parse the STDOUT part
 * @param iter buffer's iterator (ref)
 * @param length buffer's length (ref)
 * @param ct_len the content length of FCGI_STDOUT
 * @param pd_len the padding length of FCGI_STDOUT
 */
void ResponseParser::ParseStdout(const_iter &iter, long &length, long ct_len, long pd_len)
{
  if (ct_len == 0 && pd_len == 0)
    return ;

  auto len1 = ct_len > length ? length : ct_len;                   // judge if we have enough space to deal with

  std::string str(iter, iter + len1);
  std::string::size_type pos;
  if (!in_content_) {
    if ((pos = str.find("\r\n\r\n")) != std::string::npos) {
      name_value_data_.insert(name_value_data_.cend(), iter, iter + pos);
      iter += pos + 4;
      length -= pos + 4;
      ct_len -= pos + 4;
      len1 -= pos + 4;
    } else {
      state_ = State::FAULT;
      complete_ = true;
      return ;
    }
    in_content_ = true;
  }

  transform_data_.insert(transform_data_.cend(), iter, iter + len1);
  iter += len1;
  length -= len1;
  ct_len -= len1;

  if (length == 0) {
    transform_len_ += ct_len;
    padding_len_ = pd_len;
    return ;
  }

  auto len2 = pd_len > length ? length : pd_len;
  iter += len2;
  length -= len2;
  pd_len -= len2;

  if (length == 0) {
    padding_len_ = pd_len;
    return ;
  }
}

/**
 * @beief To parse the STDERR part
 * @param iter buffer's iterator (ref)
 * @param length buffer's length (ref)
 * @param ct_len the conten length of FCGI_STDERR
 * @param pd_len the padding length of FCGI_STDERR
 */

void ResponseParser::ParseStderr(const_iter &iter, long &length, long ct_len, long pd_len)
{
  if (ct_len == 0 && pd_len == 0)
    return ;

  auto len1 = ct_len > length ? length : ct_len;                   // judge if we have enough space to deal with

  transform_data_.insert(transform_data_.cend(), iter, iter + len1);
  iter += len1;
  length -= len1;
  ct_len -= len1;

  if (length == 0) {
    transform_len_ += ct_len;
    padding_len_ = pd_len;
    return ;
  }

  auto len2 = pd_len > length ? length : pd_len;
  iter += len2;
  length -= len2;
  pd_len -= len2;

  if (length == 0) {
    padding_len_ = pd_len;
    return ;
  }
}

/**
 * @brief To parse the EndRequestRecord part
 * @param iter Buffer's iterator
 */
void ResponseParser::ParseEndRequest(const_iter &iter)
{
  iter -= sizeof(Header);                               // back to the Header's start to constrcut the EndRequestRecord

  EndRequestRocord end_request_record(iter);

  app_status_ = end_request_record.app_status();
  status_ = end_request_record.protocol_status();

  complete_ = true;
  state_ = State::COMPLETED;
}

void ResponseParser::Reset()
{
  request_id_ = -1;
  transform_len_ = 0;
  padding_len_ = 0;
  app_status_ = -1;
  complete_ = false;
  in_content_ = false;
  state_ = State::UNCOMPLETED;
  status_ = Status::FCGI_UNKNOWN_ROLE;

  transform_data_.clear();
  name_value_data_.clear();
}

我觉得关键是ResponseParser,所以RequestBuilder就不贴出来了,有需要的可以补上

gdb 回溯

(gdb) r
Starting program: /home/Crow/CLionProjects/platinum/test/bin/response_parser 
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success

Program received signal SIGSEGV, Segmentation fault.
0x0000000000405f40 in platinum::fcgi::Header::Header (this=0x7fffffffb788, iter=<error reading variable: Cannot access memory at address 0x638000>)
    at /home/Crow/CLionProjects/platinum/protocol/fastCGI/component.cc:30
30          : version_(*iter),
(gdb) bt
#0  0x0000000000405f40 in platinum::fcgi::Header::Header (this=0x7fffffffb788, iter=<error reading variable: Cannot access memory at address 0x638000>)
    at /home/Crow/CLionProjects/platinum/protocol/fastCGI/component.cc:30
#1  0x0000000000408c70 in platinum::fcgi::ResponseParser::feed (this=0x7fffffffb830, 
    iter=<error reading variable: Cannot access memory at address 0x638000>, length=-58305)
    at /home/Crow/CLionProjects/platinum/protocol/fastCGI/response_parser.cc:65
#2  0x0000000000402ac7 in main () at /home/Crow/CLionProjects/platinum/test/response_parser_test.cc:81
(gdb) 

没有 O_NONBLOCK

[Crow@EvilCrow bin]$ sudo ./response_parser 
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
write: Success
hello

请求PHP文件1.php

<?php
    echo hello;
?>

我希望在设置非阻塞IO后让Unix Socket正常工作。据我了解,Unix 套接字和 INET 套接字在阻塞时的行为应该相同。为什么会这样?非常感谢你。

  while (!parser.Complete()) {
    ret = ::read(fd, data.data(), 1024);
    parser.feed(data.cbegin(), static_cast<int>(ret));

如果您使用的是非阻塞套接字,::read 可能会失败并返回 EAGAIN。在这种情况下 ret 将是 -1。您的代码没有正确处理这种情况,即您实际上只是调用 parser.feed(data.cbegin(),-1) 而不是重试读取。

-1 也不会在 parser.feed 内部进行专门处理,但它只是假设长度为正。这最终会导致访问一些不存在的内存,从而导致分段错误。

请注意,您也没有正确处理写入:您的代码只是假设所有写入都会成功并写入完整的缓冲区。在您的测试中出现这种情况只是纯粹的运气,因为您没有写入太多数据 - 如果您写入更多数据,则可能会发生写入完全失败(仅使用非阻塞套接字)或仅写入部分数据(也带阻塞套接字)。