JavaScript 文件哈希值用文件的一部分生成
JavaScript File Hash Value Generate with Part of the file
我正在使用 JavaScript 为唯一文件值生成文件哈希值。请检查以下代码以了解有效的哈希生成机制。
<script type="text/javascript">
// Reference: https://code.google.com/p/crypto-js/#MD5
function handleFileSelect(evt)
{
var files = evt.target.files; // FileList object
// Loop through the FileList and render image files as thumbnails.
for (var i = 0, f; f = files[i]; i++)
{
var reader = new FileReader();
// Closure to capture the file information.
reader.onload = (function(theFile)
{
return function(e)
{
var span = document.createElement('span');
var test = e.target.result;
//var hash = hex_md5(test);
var hash = CryptoJS.MD5(test);
var elem = document.getElementById("hashValue");
elem.value = hash;
};
})(f);
// Read in the image file as a data URL.
reader.readAsBinaryString(f);
}
}
document.getElementById('videoupload').addEventListener('change', handleFileSelect, false);
</script>
但是我在为大文件生成哈希值时遇到问题,因为在客户端浏览器崩溃了。
直到 30MB,HASHING 工作正常,但如果我尝试上传大于 30MB 的文件,系统就会崩溃。
我的问题是:
Can I generate HASH Value for part of file than reading the LARGE files and getting crashes? If yes, Can I know how to do that width
'FileReader';
Can I specify any amount of Byte such as 2000 Character of a file to generate HASH Value then generating for large files.
希望以上两种方案对大文件和小文件都适用。还有其他选择吗?
- Can I generate HASH Value for part of file than reading the LARGE files and getting crashes? If yes, Can I know how to do that width 'FileReader';
是的,你可以这样做,它被称为 Progressive Hashing。
var md5 = CryptoJS.algo.MD5.create();
md5.update("file part 1");
md5.update("file part 2");
md5.update("file part 3");
var hash = md5.finalize();
- Can I specify any amount of Byte such as 2000 Character of a file to generate HASH Value then generating for large files.
有一个 HTML5Rocks article on how one can use File.slice
to pass a sliced file to the FileReader
:
var blob = file.slice(startingByte, endindByte);
reader.readAsArrayBuffer(blob);
完整解决方案
我把两者结合起来了。棘手的部分是同步文件读取,因为 FileReader.readAsArrayBuffer()
is asynchronous. I've written a small series
function which is modeled after the series
function of async.js。它必须一个接一个地完成,因为没有办法到达 CryptoJS 的哈希函数的内部状态。
此外,CryptoJS 不理解 ArrayBuffer
是什么,因此必须将其转换为其本机数据表示,即所谓的 WordArray:
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}
另一件事是散列是一种同步操作,没有 yield
可以在其他地方继续执行。因此,浏览器将冻结,因为 JavaScript 是单线程的。解决方案是使用 Web Workers 将散列卸载到不同的线程,以便 UI 线程保持响应。
Web 工作者希望在他们的构造函数中使用脚本文件,所以我使用 Rob W 的 this solution 来获得一个内联脚本。
function series(tasks, done){
if(!tasks || tasks.length === 0) {
done();
} else {
tasks[0](function(){
series(tasks.slice(1), done);
});
}
}
function webWorkerOnMessage(e){
if (e.data.type === "create") {
md5 = CryptoJS.algo.MD5.create();
postMessage({type: "create"});
} else if (e.data.type === "update") {
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}
md5.update(arrayBufferToWordArray(e.data.chunk));
postMessage({type: "update"});
} else if (e.data.type === "finish") {
postMessage({type: "finish", hash: ""+md5.finalize()});
}
}
// URL.createObjectURL
window.URL = window.URL || window.webkitURL;
// "Server response", used in all examples
var response =
"importScripts('https://cdn.rawgit.com/CryptoStore/crypto-js/3.1.2/build/rollups/md5.js');"+
"var md5;"+
"self.onmessage = "+webWorkerOnMessage.toString();
var blob;
try {
blob = new Blob([response], {type: 'application/javascript'});
} catch (e) { // Backwards-compatibility
window.BlobBuilder = window.BlobBuilder || window.WebKitBlobBuilder || window.MozBlobBuilder;
blob = new BlobBuilder();
blob.append(response);
blob = blob.getBlob();
}
var worker = new Worker(URL.createObjectURL(blob));
var files = evt.target.files; // FileList object
var chunksize = 1000000; // the chunk size doesn't make a difference
var i = 0,
f = files[i],
chunks = Math.ceil(f.size / chunksize),
chunkTasks = [],
startTime = (new Date()).getTime();
worker.onmessage = function(e) {
// create callback
for(var j = 0; j < chunks; j++){
(function(j, f){
chunkTasks.push(function(next){
var blob = f.slice(j * chunksize, Math.min((j+1) * chunksize, f.size));
var reader = new FileReader();
reader.onload = function(e) {
var chunk = e.target.result;
worker.onmessage = function(e) {
// update callback
document.getElementById('num').innerHTML = ""+(j+1)+"/"+chunks;
next();
};
worker.postMessage({type: "update", chunk: chunk});
};
reader.readAsArrayBuffer(blob);
});
})(j, f);
}
series(chunkTasks, function(){
var elem = document.getElementById("hashValueSplit");
var telem = document.getElementById("time");
worker.onmessage = function(e) {
// finish callback
elem.value = e.data.hash;
telem.innerHTML = "in " + Math.ceil(((new Date()).getTime() - startTime) / 1000) + " seconds";
};
worker.postMessage({type: "finish"});
});
// blocking way ahead...
if (document.getElementById("singleHash").checked) {
var reader = new FileReader();
// Closure to capture the file information.
reader.onloadend = (function(theFile) {
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}
return function(e) {
var test = e.target.result;
var hash = CryptoJS.MD5(arrayBufferToWordArray(test));
//var hash = "none";
var elem = document.getElementById("hashValue");
elem.value = hash;
};
})(f);
// Read in the image file as a data URL.
reader.readAsArrayBuffer(f);
}
};
worker.postMessage({type: "create"});
DEMO 似乎适用于大文件,但需要花费很多时间。也许这可以使用更快的 MD5 实现来改进。散列一个 3 GB 的文件大约需要 23 分钟。
显示了一个没有网络工作者的 SHA-256 示例。
我正在使用 JavaScript 为唯一文件值生成文件哈希值。请检查以下代码以了解有效的哈希生成机制。
<script type="text/javascript">
// Reference: https://code.google.com/p/crypto-js/#MD5
function handleFileSelect(evt)
{
var files = evt.target.files; // FileList object
// Loop through the FileList and render image files as thumbnails.
for (var i = 0, f; f = files[i]; i++)
{
var reader = new FileReader();
// Closure to capture the file information.
reader.onload = (function(theFile)
{
return function(e)
{
var span = document.createElement('span');
var test = e.target.result;
//var hash = hex_md5(test);
var hash = CryptoJS.MD5(test);
var elem = document.getElementById("hashValue");
elem.value = hash;
};
})(f);
// Read in the image file as a data URL.
reader.readAsBinaryString(f);
}
}
document.getElementById('videoupload').addEventListener('change', handleFileSelect, false);
</script>
但是我在为大文件生成哈希值时遇到问题,因为在客户端浏览器崩溃了。
直到 30MB,HASHING 工作正常,但如果我尝试上传大于 30MB 的文件,系统就会崩溃。
我的问题是:
Can I generate HASH Value for part of file than reading the LARGE files and getting crashes? If yes, Can I know how to do that width 'FileReader';
Can I specify any amount of Byte such as 2000 Character of a file to generate HASH Value then generating for large files.
希望以上两种方案对大文件和小文件都适用。还有其他选择吗?
- Can I generate HASH Value for part of file than reading the LARGE files and getting crashes? If yes, Can I know how to do that width 'FileReader';
是的,你可以这样做,它被称为 Progressive Hashing。
var md5 = CryptoJS.algo.MD5.create();
md5.update("file part 1");
md5.update("file part 2");
md5.update("file part 3");
var hash = md5.finalize();
- Can I specify any amount of Byte such as 2000 Character of a file to generate HASH Value then generating for large files.
有一个 HTML5Rocks article on how one can use File.slice
to pass a sliced file to the FileReader
:
var blob = file.slice(startingByte, endindByte);
reader.readAsArrayBuffer(blob);
完整解决方案
我把两者结合起来了。棘手的部分是同步文件读取,因为 FileReader.readAsArrayBuffer()
is asynchronous. I've written a small series
function which is modeled after the series
function of async.js。它必须一个接一个地完成,因为没有办法到达 CryptoJS 的哈希函数的内部状态。
此外,CryptoJS 不理解 ArrayBuffer
是什么,因此必须将其转换为其本机数据表示,即所谓的 WordArray:
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}
另一件事是散列是一种同步操作,没有 yield
可以在其他地方继续执行。因此,浏览器将冻结,因为 JavaScript 是单线程的。解决方案是使用 Web Workers 将散列卸载到不同的线程,以便 UI 线程保持响应。
Web 工作者希望在他们的构造函数中使用脚本文件,所以我使用 Rob W 的 this solution 来获得一个内联脚本。
function series(tasks, done){
if(!tasks || tasks.length === 0) {
done();
} else {
tasks[0](function(){
series(tasks.slice(1), done);
});
}
}
function webWorkerOnMessage(e){
if (e.data.type === "create") {
md5 = CryptoJS.algo.MD5.create();
postMessage({type: "create"});
} else if (e.data.type === "update") {
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}
md5.update(arrayBufferToWordArray(e.data.chunk));
postMessage({type: "update"});
} else if (e.data.type === "finish") {
postMessage({type: "finish", hash: ""+md5.finalize()});
}
}
// URL.createObjectURL
window.URL = window.URL || window.webkitURL;
// "Server response", used in all examples
var response =
"importScripts('https://cdn.rawgit.com/CryptoStore/crypto-js/3.1.2/build/rollups/md5.js');"+
"var md5;"+
"self.onmessage = "+webWorkerOnMessage.toString();
var blob;
try {
blob = new Blob([response], {type: 'application/javascript'});
} catch (e) { // Backwards-compatibility
window.BlobBuilder = window.BlobBuilder || window.WebKitBlobBuilder || window.MozBlobBuilder;
blob = new BlobBuilder();
blob.append(response);
blob = blob.getBlob();
}
var worker = new Worker(URL.createObjectURL(blob));
var files = evt.target.files; // FileList object
var chunksize = 1000000; // the chunk size doesn't make a difference
var i = 0,
f = files[i],
chunks = Math.ceil(f.size / chunksize),
chunkTasks = [],
startTime = (new Date()).getTime();
worker.onmessage = function(e) {
// create callback
for(var j = 0; j < chunks; j++){
(function(j, f){
chunkTasks.push(function(next){
var blob = f.slice(j * chunksize, Math.min((j+1) * chunksize, f.size));
var reader = new FileReader();
reader.onload = function(e) {
var chunk = e.target.result;
worker.onmessage = function(e) {
// update callback
document.getElementById('num').innerHTML = ""+(j+1)+"/"+chunks;
next();
};
worker.postMessage({type: "update", chunk: chunk});
};
reader.readAsArrayBuffer(blob);
});
})(j, f);
}
series(chunkTasks, function(){
var elem = document.getElementById("hashValueSplit");
var telem = document.getElementById("time");
worker.onmessage = function(e) {
// finish callback
elem.value = e.data.hash;
telem.innerHTML = "in " + Math.ceil(((new Date()).getTime() - startTime) / 1000) + " seconds";
};
worker.postMessage({type: "finish"});
});
// blocking way ahead...
if (document.getElementById("singleHash").checked) {
var reader = new FileReader();
// Closure to capture the file information.
reader.onloadend = (function(theFile) {
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}
return function(e) {
var test = e.target.result;
var hash = CryptoJS.MD5(arrayBufferToWordArray(test));
//var hash = "none";
var elem = document.getElementById("hashValue");
elem.value = hash;
};
})(f);
// Read in the image file as a data URL.
reader.readAsArrayBuffer(f);
}
};
worker.postMessage({type: "create"});
DEMO 似乎适用于大文件,但需要花费很多时间。也许这可以使用更快的 MD5 实现来改进。散列一个 3 GB 的文件大约需要 23 分钟。