使用 b2 backblaze native php API 部分上传大文件时校验和不匹配接收到的数据错误
Checksum did not match data received error on uploading large file in parts using b2 backblaze native php API
我正在使用以下文档从 PHP 调用 Backblaze B2 API:https://www.backblaze.com/b2/docs/b2_upload_part.html
其中有以下代码:
<?php
// Used by curl when CURLOPT_READFUNCTION is set
function myReadFile($curl_rsrc, $file_pointer, $length) {
return fread($file_pointer, $length);
}
// Upload parts
$minimum_part_size = 100 * (1000 * 1000); // Obtained from b2_authorize_account. The default is 100 MB
$local_file = "<path to large file>";
$local_file_size = filesize($local_file);
$total_bytes_sent = 0;
$bytes_sent_for_part = 0;
$bytes_sent_for_part = $minimum_part_size;
$sha1_of_parts = Array();
$part_no = 1;
$file_handle = fopen($local_file, "r");
while($total_bytes_sent < $local_file_size) {
// Determine the number of bytes to send based on the minimum part size
if (($local_file_size - $total_bytes_sent) < $minimum_part_size) {
$bytes_sent_for_part = ($local_file_size - $total_bytes_sent);
}
// Get a sha1 of the part we are going to send
fseek($file_handle, $total_bytes_sent);
$data_part = fread($file_handle, $bytes_sent_for_part);
array_push($sha1_of_parts, sha1($data_part));
fseek($file_handle, $total_bytes_sent);
// Send it over th wire
$session = curl_init($upload_url);
// Add headers
$headers = array();
$headers[] = "Accept: application/json";
$headers[] = "Authorization: " . $large_file_auth_token;
$headers[] = "Content-Length: " . $bytes_sent_for_part;
$headers[] = "X-Bz-Part-Number: " . $part_no;
$headers[] = "X-Bz-Content-Sha1: " . $sha1_of_parts[$part_no - 1];
curl_setopt($session, CURLOPT_POST, true);
curl_setopt($session, CURLOPT_HTTPHEADER, $headers); // Add headers
curl_setopt($session, CURLOPT_INFILE, $file_handle);
curl_setopt($session, CURLOPT_INFILESIZE, (int)$bytes_sent_for_part);
curl_setopt($session, CURLOPT_RETURNTRANSFER, true); // Receive server response
curl_setopt($session, CURLOPT_READFUNCTION, "myReadFile");
$server_output = curl_exec($session);
curl_close ($session);
print $server_output . "\n";
// Prepare for the next iteration of the loop
$part_no++;
$total_bytes_sent = $bytes_sent_for_part + $total_bytes_sent;
$read_file_bytes_read = 0;
}
fclose($file_handle);
?>
这给了我回应:
{
"code": "bad_request",
"message": "Checksum did not match data received",
"status": 400
}
文档状态:
CURLOPT_INFILESIZE
The expected size, in bytes, of the file when uploading a file to a remote site. Note that using this option will not stop libcurl from sending more data, as exactly what is sent depends on CURLOPT_READFUNCTION.
这似乎暗示 curl 每次只是读取到文件末尾。
您要做的是使您提供给 CURLOPT_READFUNCTION 更高级 的功能,并知道它应该在哪里停止读取它的块。
为此,我写了一个 CurlFileChunker
class 封装了任务及其所有相关函数和变量。
class CurlFileChunker {
private $fp;
protected $chunkSize;
protected $offset;
protected $nextStop;
public function __construct($fp, int $chunkSize) {
$this->fp = $fp;
$this->chunkSize = $chunkSize;
$this->offset = ftell($fp);
$this->nextStop = $this->offset + $this->chunkSize;
}
protected function getChunk() {
return fread($this->fp, $this->chunkSize);
}
protected function reset() {
fseek($this->fp, $this->offset);
}
public function eof() {
return feof($this->fp);
}
public function getChunkInfo(string $hashMethod) {
$chunk = $this->getChunk();
$info = [
'hash' => hash($hashMethod, $chunk),
'length' => strlen($chunk)
];
$this->reset();
return $info;
}
public function next() {
$this->nextStop = $this->offset + $this->chunkSize;
}
public function curlReadFunction($ch, $fp, int $length) {
if( $fp !== $this->fp ) {
throw new \Exception('File handle supplied differs from expected.');
}
// case 1: requested read is still within the chunk, return the requested data.
if( $this->offset + $length < $this->nextStop ) {
$out = fread($this->fp, $length);
// case 2: requested read goes beyond the bounds of the chunk, return data up to the chunk boundary.
} else if( $this->nextStop - $this->offset > 0 ) {
$out = fread($this->fp, $this->nextStop - $this->offset);
// case 3: offset and nextstop are the same, return zero byte string signifying EOF to curl
} else {
$out = '';
}
$this->offset = ftell($this->fp);
return $out;
}
}
还有一个用法示例,假装我们正在 Curl 读取比块更小的片段:
$fp = fopen('php://memory', 'rwb');
fwrite($fp, 'lorem ipsum dolor sit amet');
rewind($fp);
$c = new CurlFileChunker($fp, 10);
while( ! $c->eof() ) {
$info = $c->getChunkInfo('sha1');
var_dump($info);
$chunk = '';
while( $part = $c->curlReadFunction(NULL, $fp, 7) ) {
$chunk .= $part;
}
var_dump($chunk);
$c->next();
}
输出:
array(2) {
["hash"]=>
string(40) "94ae3406c7e5e2ba31208dc623c20d2a107bfec2"
["length"]=>
int(10)
}
string(10) "lorem ipsu"
string(40) "94ae3406c7e5e2ba31208dc623c20d2a107bfec2"
array(2) {
["hash"]=>
string(40) "aebf816b6e13941737d5045c294ffe785ca55733"
["length"]=>
int(10)
}
string(10) "m dolor si"
string(40) "aebf816b6e13941737d5045c294ffe785ca55733"
array(2) {
["hash"]=>
string(40) "21d8e40707fa773b532ae892f82c057e92764f3a"
["length"]=>
int(6)
}
string(6) "t amet"
string(40) "21d8e40707fa773b532ae892f82c057e92764f3a"
你的代码大致变成:
$file_handle = fopen($local_file, "r");
$c = new CurlFileChunker($file_handle, 10 * 1024 * 1024);
$part_no = 0;
while( ! $c->eof ) {
$info = $c->getChunkInfo('sha1');
$session = curl_init($upload_url);
$headers = array();
$headers[] = "Accept: application/json";
$headers[] = "Authorization: " . $large_file_auth_token;
$headers[] = "Content-Length: " . $info['length'];
$headers[] = "X-Bz-Part-Number: " . $part_no;
$headers[] = "X-Bz-Content-Sha1: " . $info['hash'];
curl_setopt($session, CURLOPT_POST, true);
curl_setopt($session, CURLOPT_HTTPHEADER, $headers); // Add headers
curl_setopt($session, CURLOPT_INFILE, $file_handle);
curl_setopt($session, CURLOPT_INFILESIZE, $info['length'];
curl_setopt($session, CURLOPT_RETURNTRANSFER, true); // Receive server response
curl_setopt($session, CURLOPT_READFUNCTION, [$c, 'curlReadFunction']);
$server_output = curl_exec($session);
curl_close ($session);
$c->next();
}
研究我在尝试重现此问题时观察到的行为使我陷入了 PHP 的 curl 实现处理 POST 请求的方式。
PHP 的 curl 默认为 chunked
transfer encoding,B2 Native API 不支持。对我来说,这引发了一个 'missing Content-Length header' 错误,但它也可能是您所看到的错误的原因,具体取决于您的代码。
我的解决方法是在 header 数组中包含一个空的 Transfer-Encoding
header:
$headers[] = "Transfer-Encoding:";
我将各种必需的 B2 Native API 调用组合成一个完整的工作 PHP B2 分段上传示例,该示例从命令行运行:https://gist.github.com/metadaddy/23e75d00b21bc63576f0ba317ad43709
希望,即使空 Transfer-Encoding
header 不能为您解决问题,示例代码也能帮助您继续前进。
我正在使用以下文档从 PHP 调用 Backblaze B2 API:https://www.backblaze.com/b2/docs/b2_upload_part.html
其中有以下代码:
<?php
// Used by curl when CURLOPT_READFUNCTION is set
function myReadFile($curl_rsrc, $file_pointer, $length) {
return fread($file_pointer, $length);
}
// Upload parts
$minimum_part_size = 100 * (1000 * 1000); // Obtained from b2_authorize_account. The default is 100 MB
$local_file = "<path to large file>";
$local_file_size = filesize($local_file);
$total_bytes_sent = 0;
$bytes_sent_for_part = 0;
$bytes_sent_for_part = $minimum_part_size;
$sha1_of_parts = Array();
$part_no = 1;
$file_handle = fopen($local_file, "r");
while($total_bytes_sent < $local_file_size) {
// Determine the number of bytes to send based on the minimum part size
if (($local_file_size - $total_bytes_sent) < $minimum_part_size) {
$bytes_sent_for_part = ($local_file_size - $total_bytes_sent);
}
// Get a sha1 of the part we are going to send
fseek($file_handle, $total_bytes_sent);
$data_part = fread($file_handle, $bytes_sent_for_part);
array_push($sha1_of_parts, sha1($data_part));
fseek($file_handle, $total_bytes_sent);
// Send it over th wire
$session = curl_init($upload_url);
// Add headers
$headers = array();
$headers[] = "Accept: application/json";
$headers[] = "Authorization: " . $large_file_auth_token;
$headers[] = "Content-Length: " . $bytes_sent_for_part;
$headers[] = "X-Bz-Part-Number: " . $part_no;
$headers[] = "X-Bz-Content-Sha1: " . $sha1_of_parts[$part_no - 1];
curl_setopt($session, CURLOPT_POST, true);
curl_setopt($session, CURLOPT_HTTPHEADER, $headers); // Add headers
curl_setopt($session, CURLOPT_INFILE, $file_handle);
curl_setopt($session, CURLOPT_INFILESIZE, (int)$bytes_sent_for_part);
curl_setopt($session, CURLOPT_RETURNTRANSFER, true); // Receive server response
curl_setopt($session, CURLOPT_READFUNCTION, "myReadFile");
$server_output = curl_exec($session);
curl_close ($session);
print $server_output . "\n";
// Prepare for the next iteration of the loop
$part_no++;
$total_bytes_sent = $bytes_sent_for_part + $total_bytes_sent;
$read_file_bytes_read = 0;
}
fclose($file_handle);
?>
这给了我回应:
{
"code": "bad_request",
"message": "Checksum did not match data received",
"status": 400
}
文档状态:
CURLOPT_INFILESIZE
The expected size, in bytes, of the file when uploading a file to a remote site. Note that using this option will not stop libcurl from sending more data, as exactly what is sent depends on CURLOPT_READFUNCTION.
这似乎暗示 curl 每次只是读取到文件末尾。
您要做的是使您提供给 CURLOPT_READFUNCTION 更高级 的功能,并知道它应该在哪里停止读取它的块。
为此,我写了一个 CurlFileChunker
class 封装了任务及其所有相关函数和变量。
class CurlFileChunker {
private $fp;
protected $chunkSize;
protected $offset;
protected $nextStop;
public function __construct($fp, int $chunkSize) {
$this->fp = $fp;
$this->chunkSize = $chunkSize;
$this->offset = ftell($fp);
$this->nextStop = $this->offset + $this->chunkSize;
}
protected function getChunk() {
return fread($this->fp, $this->chunkSize);
}
protected function reset() {
fseek($this->fp, $this->offset);
}
public function eof() {
return feof($this->fp);
}
public function getChunkInfo(string $hashMethod) {
$chunk = $this->getChunk();
$info = [
'hash' => hash($hashMethod, $chunk),
'length' => strlen($chunk)
];
$this->reset();
return $info;
}
public function next() {
$this->nextStop = $this->offset + $this->chunkSize;
}
public function curlReadFunction($ch, $fp, int $length) {
if( $fp !== $this->fp ) {
throw new \Exception('File handle supplied differs from expected.');
}
// case 1: requested read is still within the chunk, return the requested data.
if( $this->offset + $length < $this->nextStop ) {
$out = fread($this->fp, $length);
// case 2: requested read goes beyond the bounds of the chunk, return data up to the chunk boundary.
} else if( $this->nextStop - $this->offset > 0 ) {
$out = fread($this->fp, $this->nextStop - $this->offset);
// case 3: offset and nextstop are the same, return zero byte string signifying EOF to curl
} else {
$out = '';
}
$this->offset = ftell($this->fp);
return $out;
}
}
还有一个用法示例,假装我们正在 Curl 读取比块更小的片段:
$fp = fopen('php://memory', 'rwb');
fwrite($fp, 'lorem ipsum dolor sit amet');
rewind($fp);
$c = new CurlFileChunker($fp, 10);
while( ! $c->eof() ) {
$info = $c->getChunkInfo('sha1');
var_dump($info);
$chunk = '';
while( $part = $c->curlReadFunction(NULL, $fp, 7) ) {
$chunk .= $part;
}
var_dump($chunk);
$c->next();
}
输出:
array(2) {
["hash"]=>
string(40) "94ae3406c7e5e2ba31208dc623c20d2a107bfec2"
["length"]=>
int(10)
}
string(10) "lorem ipsu"
string(40) "94ae3406c7e5e2ba31208dc623c20d2a107bfec2"
array(2) {
["hash"]=>
string(40) "aebf816b6e13941737d5045c294ffe785ca55733"
["length"]=>
int(10)
}
string(10) "m dolor si"
string(40) "aebf816b6e13941737d5045c294ffe785ca55733"
array(2) {
["hash"]=>
string(40) "21d8e40707fa773b532ae892f82c057e92764f3a"
["length"]=>
int(6)
}
string(6) "t amet"
string(40) "21d8e40707fa773b532ae892f82c057e92764f3a"
你的代码大致变成:
$file_handle = fopen($local_file, "r");
$c = new CurlFileChunker($file_handle, 10 * 1024 * 1024);
$part_no = 0;
while( ! $c->eof ) {
$info = $c->getChunkInfo('sha1');
$session = curl_init($upload_url);
$headers = array();
$headers[] = "Accept: application/json";
$headers[] = "Authorization: " . $large_file_auth_token;
$headers[] = "Content-Length: " . $info['length'];
$headers[] = "X-Bz-Part-Number: " . $part_no;
$headers[] = "X-Bz-Content-Sha1: " . $info['hash'];
curl_setopt($session, CURLOPT_POST, true);
curl_setopt($session, CURLOPT_HTTPHEADER, $headers); // Add headers
curl_setopt($session, CURLOPT_INFILE, $file_handle);
curl_setopt($session, CURLOPT_INFILESIZE, $info['length'];
curl_setopt($session, CURLOPT_RETURNTRANSFER, true); // Receive server response
curl_setopt($session, CURLOPT_READFUNCTION, [$c, 'curlReadFunction']);
$server_output = curl_exec($session);
curl_close ($session);
$c->next();
}
研究我在尝试重现此问题时观察到的行为使我陷入了 PHP 的 curl 实现处理 POST 请求的方式。
PHP 的 curl 默认为 chunked
transfer encoding,B2 Native API 不支持。对我来说,这引发了一个 'missing Content-Length header' 错误,但它也可能是您所看到的错误的原因,具体取决于您的代码。
我的解决方法是在 header 数组中包含一个空的 Transfer-Encoding
header:
$headers[] = "Transfer-Encoding:";
我将各种必需的 B2 Native API 调用组合成一个完整的工作 PHP B2 分段上传示例,该示例从命令行运行:https://gist.github.com/metadaddy/23e75d00b21bc63576f0ba317ad43709
希望,即使空 Transfer-Encoding
header 不能为您解决问题,示例代码也能帮助您继续前进。