找出这两张图片之间的区别
Spot the difference between these two images
以编程方式,我的代码正在检测两个 class 图像之间的差异,并且始终拒绝一个 class,同时始终允许另一个。
我还没有发现产生错误的图像和没有产生错误的图像之间的任何区别。但是必须有一些区别,因为产生错误的是 100% 的时间,而其他的是 100% 的时间按预期工作。
特别是,我检查了颜色格式:两组中的 RGB;尺寸:无显着差异;数据类型:两者均为 uint8;像素值的大小:两者相似。
下面是两张永远不起作用的图像,后面是两张总是有效的图像:
这张图片从来没有用过:https://www.colourbox.com/preview/11906131-maple-tree-and-grass-silhouette.jpg
这张图片总是有效:http://www.modernhouse.co/wp-content/uploads/2017/07/1024px-RoseSeidlerHouseSulmanPrize.jpg
如何找出不同之处?
场景是我使用带有 Swift iOS 前端的 Firebase 将这些图像发送到 Google Cloud ML-engine 托管的 convnet。有些图像一直有效,而另一些图像则永远不会像上面那样工作。此外,当我使用 gcloud 版本预测 CLI 时,所有图像都有效。对我来说,问题必然是图像中的某些东西。因此,我在这里为成像组发帖。为了完整性,代码已按要求包含在内。
包含 index.js 个文件的代码:
'use strict';
const functions = require('firebase-functions');
const gcs = require('@google-cloud/storage');
const admin = require('firebase-admin');
const exec = require('child_process').exec;
const path = require('path');
const fs = require('fs');
const google = require('googleapis');
const sizeOf = require('image-size');
admin.initializeApp(functions.config().firebase);
const db = admin.firestore();
const rtdb = admin.database();
const dbRef = rtdb.ref();
function cmlePredict(b64img) {
return new Promise((resolve, reject) => {
google.auth.getApplicationDefault(function (err, authClient) {
if (err) {
reject(err);
}
if (authClient.createScopedRequired && authClient.createScopedRequired()) {
authClient = authClient.createScoped([
'https://www.googleapis.com/auth/cloud-platform'
]);
}
var ml = google.ml({
version: 'v1'
});
const params = {
auth: authClient,
name: 'projects/myproject-18865/models/my_model',
resource: {
instances: [
{
"image_bytes": {
"b64": b64img
}
}
]
}
};
ml.projects.predict(params, (err, result) => {
if (err) {
reject(err);
} else {
resolve(result);
}
});
});
});
}
function resizeImg(filepath) {
return new Promise((resolve, reject) => {
exec(`convert ${filepath} -resize 224x ${filepath}`, (err) => {
if (err) {
console.error('Failed to resize image', err);
reject(err);
} else {
console.log('resized image successfully');
resolve(filepath);
}
});
});
}
exports.runPrediction = functions.storage.object().onChange((event) => {
fs.rmdir('./tmp/', (err) => {
if (err) {
console.log('error deleting tmp/ dir');
}
});
const object = event.data;
const fileBucket = object.bucket;
const filePath = object.name;
const bucket = gcs().bucket(fileBucket);
const fileName = path.basename(filePath);
const file = bucket.file(filePath);
if (filePath.startsWith('images/')) {
const destination = '/tmp/' + fileName;
console.log('got a new image', filePath);
return file.download({
destination: destination
}).then(() => {
if(sizeOf(destination).width > 224) {
console.log('scaling image down...');
return resizeImg(destination);
} else {
return destination;
}
}).then(() => {
console.log('base64 encoding image...');
let bitmap = fs.readFileSync(destination);
return new Buffer(bitmap).toString('base64');
}).then((b64string) => {
console.log('sending image to CMLE...');
return cmlePredict(b64string);
}).then((result) => {
console.log(`results just returned and is: ${result}`);
let predict_proba = result.predictions[0]
const res_pred_val = Object.keys(predict_proba).map(k => predict_proba[k])
const res_val = Object.keys(result).map(k => result[k])
const class_proba = [1-res_pred_val,res_pred_val]
const opera_proba_init = 1-res_pred_val
const capitol_proba_init = res_pred_val-0
// convert fraction double to percentage int
let opera_proba = (Math.floor((opera_proba_init.toFixed(2))*100))|0
let capitol_proba = (Math.floor((capitol_proba_init.toFixed(2))*100))|0
let feature_list = ["houses", "trees"]
let outlinedImgPath = '';
let imageRef = db.collection('predicted_images').doc(filePath.slice(7));
outlinedImgPath = `outlined_img/${filePath.slice(7)}`;
imageRef.set({
image_path: outlinedImgPath,
opera_proba: opera_proba,
capitol_proba: capitol_proba
});
let predRef = dbRef.child("prediction_categories");
let arrayRef = dbRef.child("prediction_array");
predRef.set({
opera_proba: opera_proba,
capitol_proba: capitol_proba,
});
arrayRef.set({first: {
array_proba: [opera_proba,capitol_proba],
brief_description: ["a","b"],
more_details: ["aaaa","bbbb"],
feature_list: feature_list},
zummy1: "",
zummy2: ""});
return bucket.upload(destination, {destination: outlinedImgPath});
});
} else {
return 'not a new image';
}
});
问题是不良图像是灰度图像,而不是我的模型预期的 RGB。我最初是通过查看形状来检查的。但是 'bad' 图像有 3 个颜色通道,这 3 个通道中的每一个都存储相同的数字 --- 所以我的模型拒绝接受它们。此外,正如预期的那样,与我最初认为观察到的相反,事实证明 gcloud ML-engine predict CLI 对于这些图像实际上也失败了。我花了 2 天时间才弄明白!
以编程方式,我的代码正在检测两个 class 图像之间的差异,并且始终拒绝一个 class,同时始终允许另一个。
我还没有发现产生错误的图像和没有产生错误的图像之间的任何区别。但是必须有一些区别,因为产生错误的是 100% 的时间,而其他的是 100% 的时间按预期工作。
特别是,我检查了颜色格式:两组中的 RGB;尺寸:无显着差异;数据类型:两者均为 uint8;像素值的大小:两者相似。
下面是两张永远不起作用的图像,后面是两张总是有效的图像:
这张图片从来没有用过:https://www.colourbox.com/preview/11906131-maple-tree-and-grass-silhouette.jpg
这张图片总是有效:http://www.modernhouse.co/wp-content/uploads/2017/07/1024px-RoseSeidlerHouseSulmanPrize.jpg
如何找出不同之处?
场景是我使用带有 Swift iOS 前端的 Firebase 将这些图像发送到 Google Cloud ML-engine 托管的 convnet。有些图像一直有效,而另一些图像则永远不会像上面那样工作。此外,当我使用 gcloud 版本预测 CLI 时,所有图像都有效。对我来说,问题必然是图像中的某些东西。因此,我在这里为成像组发帖。为了完整性,代码已按要求包含在内。
包含 index.js 个文件的代码:
'use strict';
const functions = require('firebase-functions');
const gcs = require('@google-cloud/storage');
const admin = require('firebase-admin');
const exec = require('child_process').exec;
const path = require('path');
const fs = require('fs');
const google = require('googleapis');
const sizeOf = require('image-size');
admin.initializeApp(functions.config().firebase);
const db = admin.firestore();
const rtdb = admin.database();
const dbRef = rtdb.ref();
function cmlePredict(b64img) {
return new Promise((resolve, reject) => {
google.auth.getApplicationDefault(function (err, authClient) {
if (err) {
reject(err);
}
if (authClient.createScopedRequired && authClient.createScopedRequired()) {
authClient = authClient.createScoped([
'https://www.googleapis.com/auth/cloud-platform'
]);
}
var ml = google.ml({
version: 'v1'
});
const params = {
auth: authClient,
name: 'projects/myproject-18865/models/my_model',
resource: {
instances: [
{
"image_bytes": {
"b64": b64img
}
}
]
}
};
ml.projects.predict(params, (err, result) => {
if (err) {
reject(err);
} else {
resolve(result);
}
});
});
});
}
function resizeImg(filepath) {
return new Promise((resolve, reject) => {
exec(`convert ${filepath} -resize 224x ${filepath}`, (err) => {
if (err) {
console.error('Failed to resize image', err);
reject(err);
} else {
console.log('resized image successfully');
resolve(filepath);
}
});
});
}
exports.runPrediction = functions.storage.object().onChange((event) => {
fs.rmdir('./tmp/', (err) => {
if (err) {
console.log('error deleting tmp/ dir');
}
});
const object = event.data;
const fileBucket = object.bucket;
const filePath = object.name;
const bucket = gcs().bucket(fileBucket);
const fileName = path.basename(filePath);
const file = bucket.file(filePath);
if (filePath.startsWith('images/')) {
const destination = '/tmp/' + fileName;
console.log('got a new image', filePath);
return file.download({
destination: destination
}).then(() => {
if(sizeOf(destination).width > 224) {
console.log('scaling image down...');
return resizeImg(destination);
} else {
return destination;
}
}).then(() => {
console.log('base64 encoding image...');
let bitmap = fs.readFileSync(destination);
return new Buffer(bitmap).toString('base64');
}).then((b64string) => {
console.log('sending image to CMLE...');
return cmlePredict(b64string);
}).then((result) => {
console.log(`results just returned and is: ${result}`);
let predict_proba = result.predictions[0]
const res_pred_val = Object.keys(predict_proba).map(k => predict_proba[k])
const res_val = Object.keys(result).map(k => result[k])
const class_proba = [1-res_pred_val,res_pred_val]
const opera_proba_init = 1-res_pred_val
const capitol_proba_init = res_pred_val-0
// convert fraction double to percentage int
let opera_proba = (Math.floor((opera_proba_init.toFixed(2))*100))|0
let capitol_proba = (Math.floor((capitol_proba_init.toFixed(2))*100))|0
let feature_list = ["houses", "trees"]
let outlinedImgPath = '';
let imageRef = db.collection('predicted_images').doc(filePath.slice(7));
outlinedImgPath = `outlined_img/${filePath.slice(7)}`;
imageRef.set({
image_path: outlinedImgPath,
opera_proba: opera_proba,
capitol_proba: capitol_proba
});
let predRef = dbRef.child("prediction_categories");
let arrayRef = dbRef.child("prediction_array");
predRef.set({
opera_proba: opera_proba,
capitol_proba: capitol_proba,
});
arrayRef.set({first: {
array_proba: [opera_proba,capitol_proba],
brief_description: ["a","b"],
more_details: ["aaaa","bbbb"],
feature_list: feature_list},
zummy1: "",
zummy2: ""});
return bucket.upload(destination, {destination: outlinedImgPath});
});
} else {
return 'not a new image';
}
});
问题是不良图像是灰度图像,而不是我的模型预期的 RGB。我最初是通过查看形状来检查的。但是 'bad' 图像有 3 个颜色通道,这 3 个通道中的每一个都存储相同的数字 --- 所以我的模型拒绝接受它们。此外,正如预期的那样,与我最初认为观察到的相反,事实证明 gcloud ML-engine predict CLI 对于这些图像实际上也失败了。我花了 2 天时间才弄明白!