BigQuery UDF 减少所有行
BigQuery UDF reducing all rows
我定义了以下 UDF(注意我的 table 有一个 'Id' 和一个带有子字段 'RawHex' 的 'Reading' 对象):
// UDF definition
function hexdecode(row, emit) {
emit({
Id: row.Id,
converted: decodeHelper(row.Reading.Raw)
});
}
// Helper function with error handling
function decodeHelper(s) {
try {
return parseInt(s, 16);
} catch (ex) {
return s;
}
}
// UDF registration
bigquery.defineFunction(
'hexdecode', // Name used to call the function from SQL
['Id', 'Reading.Raw'], // Input column names
// JSON representation of the output schema
[{name: 'Id', type: 'STRING'},
{name: 'converted', type: 'INTEGER'}],
hexdecode // The function reference
);
我的查询是:
select Id, converted from
hexdecode(
select r.Id, r.Reading.Raw from Example.TagRaw2 r
)
通过将以下 JSON 上传到 table Example.TagRaw2
{"Id":"ABC","Reading":{"Raw":"0004"}}
{"Id":"CDE","Reading":{"Raw":"000b"}}
我希望这会将我的列从十六进制转换为整数,但如果 returns 一个几乎没有意义的条目。
知道我做错了什么吗?
编辑: 我添加了一个示例 JSON 来上传以尝试重现该问题.. 但它现在似乎有效。如果我能找出原来的错误,我会再次更新,但上面的 UDF 似乎完全符合我的要求。
问题 - 您的 Reading 对象是否是一个重复的列?
如果是这样,您需要执行以下操作:
function hexdecode(r, emit) {
for (var i = 0; i < r.reading.length; ++i) {
emit({ tag: r.Id, num: parseInt(r.reading[i].Raw, 16) });
}
}
bigquery.defineFunction(
'hexdecode',
['Id', 'reading.Raw'],
[{name: 'tag', type: 'string'},
{name: 'num', type: 'integer'}],
hexdecode
);
如果您尝试访问 r.reading.Raw
,这将尝试访问 JavaScript 数组的 Raw
属性。这是完全有效的 JavaScript,但它肯定不是您想要的,因为该值未定义(空)。
一个方便的技巧是打印出输入记录的 JSON 字符串,以查看 UDF 将什么视为输入:
bigquery.defineFunction(
'jsonifyObj',
['Id', 'reading.Raw'],
[{name: 'obj', type: 'string'}],
function(r, emit) { emit({obj: JSON.stringify(r)}); }
);
这有助于调试问题。我怀疑这种情况下的问题是您的数据看起来更像
[{"Id":"ABC","Reading":[{"Raw":"0004"}, {"Raw": "00ff"}]},
{"Id":"CDE","Reading":[{"Raw":"000b"}, {"Raw": "0012"}]}]
-------- 2015-11-17 更新--------
您的代码有一些问题,请在下方注释:
function hexdecode(row, emit) {
for (var i = 0; i < row.reading.length; ++i) {
// NOTE: tag and row.Id are wrong, this must be TagId and row.TagId based on your input and output specifications
emit({ tag: row.Id,
times: row.reading[i].Timestamp,
// NOTE: You're making a recursive call here! You should be calling decodeHelper() not hexdecode().
convert: hexdecode(row.reading[i].RawCO) });
}
}
// Helper function with error handling
function decodeHelper(s) {
try {
return parseInt(s, 16);
} catch (ex) {
return s;
}
}
// UDF registration
bigquery.defineFunction(
'hexdecode', // Name used to call the function from SQL
['TagId', 'reading.Timestamp', 'reading.RawCO'], // Input column names
// JSON representation of the output schema
[{name: 'TagId', type: 'STRING'},
{name: 'times', type: 'INTEGER'},
{name: 'convert', type: 'INTEGER'}],
hexdecode // The function reference
);
你的嵌套select returns 0行,所以我更新为如下SQL:
select
TagID, times, convert
from hexdecode(
select r.TagId, r.Reading.Timestamp, r.Reading.RawCO from [table.Readings] r where r.Reading.RawCO is not NULL and r.Reading.PPM is
not NULL
and r.TagId = 'Tag_00000000'
)
这里是更正后的代码:
function hexdecode(row, emit) {
for (var i = 0; i < row.reading.length; ++i) {
emit({TagId: row.TagId, times: row.reading[i].Timestamp, convert: decodeHelper(row.reading[i].RawCO)});
}
}
// Helper function with error handling
function decodeHelper(s) {
try {
return parseInt(s, 16);
} catch (ex) {
return s;
}
}
// UDF registration
bigquery.defineFunction(
'hexdecode', // Name used to call the function from SQL
['TagId', 'reading.Timestamp', 'reading.RawCO'], // Input column names
// JSON representation of the output schema
[{name: 'TagId', type: 'STRING'},
{name: 'times', type: 'INTEGER'},
{name: 'convert', type: 'INTEGER'}],
hexdecode // The function reference
);
我定义了以下 UDF(注意我的 table 有一个 'Id' 和一个带有子字段 'RawHex' 的 'Reading' 对象):
// UDF definition
function hexdecode(row, emit) {
emit({
Id: row.Id,
converted: decodeHelper(row.Reading.Raw)
});
}
// Helper function with error handling
function decodeHelper(s) {
try {
return parseInt(s, 16);
} catch (ex) {
return s;
}
}
// UDF registration
bigquery.defineFunction(
'hexdecode', // Name used to call the function from SQL
['Id', 'Reading.Raw'], // Input column names
// JSON representation of the output schema
[{name: 'Id', type: 'STRING'},
{name: 'converted', type: 'INTEGER'}],
hexdecode // The function reference
);
我的查询是:
select Id, converted from
hexdecode(
select r.Id, r.Reading.Raw from Example.TagRaw2 r
)
通过将以下 JSON 上传到 table Example.TagRaw2
{"Id":"ABC","Reading":{"Raw":"0004"}}
{"Id":"CDE","Reading":{"Raw":"000b"}}
我希望这会将我的列从十六进制转换为整数,但如果 returns 一个几乎没有意义的条目。
知道我做错了什么吗?
编辑: 我添加了一个示例 JSON 来上传以尝试重现该问题.. 但它现在似乎有效。如果我能找出原来的错误,我会再次更新,但上面的 UDF 似乎完全符合我的要求。
问题 - 您的 Reading 对象是否是一个重复的列?
如果是这样,您需要执行以下操作:
function hexdecode(r, emit) {
for (var i = 0; i < r.reading.length; ++i) {
emit({ tag: r.Id, num: parseInt(r.reading[i].Raw, 16) });
}
}
bigquery.defineFunction(
'hexdecode',
['Id', 'reading.Raw'],
[{name: 'tag', type: 'string'},
{name: 'num', type: 'integer'}],
hexdecode
);
如果您尝试访问 r.reading.Raw
,这将尝试访问 JavaScript 数组的 Raw
属性。这是完全有效的 JavaScript,但它肯定不是您想要的,因为该值未定义(空)。
一个方便的技巧是打印出输入记录的 JSON 字符串,以查看 UDF 将什么视为输入:
bigquery.defineFunction(
'jsonifyObj',
['Id', 'reading.Raw'],
[{name: 'obj', type: 'string'}],
function(r, emit) { emit({obj: JSON.stringify(r)}); }
);
这有助于调试问题。我怀疑这种情况下的问题是您的数据看起来更像
[{"Id":"ABC","Reading":[{"Raw":"0004"}, {"Raw": "00ff"}]},
{"Id":"CDE","Reading":[{"Raw":"000b"}, {"Raw": "0012"}]}]
-------- 2015-11-17 更新--------
您的代码有一些问题,请在下方注释:
function hexdecode(row, emit) {
for (var i = 0; i < row.reading.length; ++i) {
// NOTE: tag and row.Id are wrong, this must be TagId and row.TagId based on your input and output specifications
emit({ tag: row.Id,
times: row.reading[i].Timestamp,
// NOTE: You're making a recursive call here! You should be calling decodeHelper() not hexdecode().
convert: hexdecode(row.reading[i].RawCO) });
}
}
// Helper function with error handling
function decodeHelper(s) {
try {
return parseInt(s, 16);
} catch (ex) {
return s;
}
}
// UDF registration
bigquery.defineFunction(
'hexdecode', // Name used to call the function from SQL
['TagId', 'reading.Timestamp', 'reading.RawCO'], // Input column names
// JSON representation of the output schema
[{name: 'TagId', type: 'STRING'},
{name: 'times', type: 'INTEGER'},
{name: 'convert', type: 'INTEGER'}],
hexdecode // The function reference
);
你的嵌套select returns 0行,所以我更新为如下SQL:
select
TagID, times, convert
from hexdecode(
select r.TagId, r.Reading.Timestamp, r.Reading.RawCO from [table.Readings] r where r.Reading.RawCO is not NULL and r.Reading.PPM is
not NULL
and r.TagId = 'Tag_00000000'
)
这里是更正后的代码:
function hexdecode(row, emit) {
for (var i = 0; i < row.reading.length; ++i) {
emit({TagId: row.TagId, times: row.reading[i].Timestamp, convert: decodeHelper(row.reading[i].RawCO)});
}
}
// Helper function with error handling
function decodeHelper(s) {
try {
return parseInt(s, 16);
} catch (ex) {
return s;
}
}
// UDF registration
bigquery.defineFunction(
'hexdecode', // Name used to call the function from SQL
['TagId', 'reading.Timestamp', 'reading.RawCO'], // Input column names
// JSON representation of the output schema
[{name: 'TagId', type: 'STRING'},
{name: 'times', type: 'INTEGER'},
{name: 'convert', type: 'INTEGER'}],
hexdecode // The function reference
);