Google BigQuery 交叉联接
Google BigQuery cross join
我正在使用交叉连接访问 2 table 秒的数据。但是通过交叉连接,我得到错误 "d.DebugData not found in table "bigdata:RawDebug.CarrierDetails”。任何帮助将不胜感激!!
SELECT
HardwareId, DebugReason, DebugData,
CASE
WHEN REGEXP_MATCH(DebugData,'\d+') THEN c.Network
ELSE REGEXP_REPLACE(DebugData,'\?',' ')
END
as ActualDebugData
FROM(
SELECT
HardwareId, DebugReason, DebugData
FROM TABLE_DATE_RANGE([bigdata:RawDebug.T],TIMESTAMP ('2016-05-15'),TIMESTAMP('2016-05-15'))
WHERE Reason = 500
) as d
CROSS JOIN (
SELECT Network
FROM [bigdata:RawDebug.CarrierDetails]
WHERE Mcc = substr(d.DebugData,0,3) AND Mnc = substr(d.DebugData,4,LENGTH(d.Reason - 1))
LIMIT 1
) AS c
试过了,但我得到这个错误:“ON 子句必须是 AND of = 每个 table 的一个字段名称的比较,所有字段名称都以 table 名称为前缀。”
%%sql --module Test2
DEFINE QUERY Test2
SELECT
HardwareId, DebugReason, DebugData,
CASE
WHEN REGEXP_MATCH(DebugData,'\d+') THEN c.Network
ELSE REGEXP_REPLACE(DebugData,'\?',' ')
END AS ActualDebugData
FROM (
SELECT
HardwareId, DebugReason, DebugData,
SUBSTR(DebugData,0,3) AS d1, REGEXP_REPLACE(SUBSTR(DebugData,3,LENGTH(DebugData)-1),'%[^a-zA-Z0-9, ]%',' ') as d2
FROM TABLE_DATE_RANGE([bigdata:RawDebug.T],TIMESTAMP('2016-05-15'),TIMESTAMP('2016-05-15'))
WHERE DebugReason = 500
) AS d
LEFT JOIN (
SELECT
Network, Mcc, Mnc
,ROW_NUMBER() OVER(PARTITION BY Mcc, Mnc) AS pos
FROM [bigdata:RawDebug.CarrierDetails]
) AS c
ON c.Mcc = INTEGER(d.d1) AND c.Mnc = INTEGER(d.d2)
WHERE c.pos = 1
我正在添加以下结构:
RawDebug:
HardwareId DebugReason DebugData
550029358 50013 VER%
550029359 50013 RO%
550029360 50013 34020?
550029361 50013 34021?
当 DebugData 有字符时,我有匹配它的 case 语句,当它有数字时,我必须取前 3 个字符的子串并将其与 Carrierdetails 中的 Mcc 匹配,其余字符与 Mnc 匹配在承运人详细信息中。
最近的查询,没有考虑所有的情况。相反,它采用一个特定的数字并对所有行使用 tat ActualDebugData。
SELECT
HardwareId, DebugReason, DebugData,
CASE
WHEN REGEXP_MATCH(DebugData,'\d+') THEN c.Network
ELSE REGEXP_REPLACE(DebugData,'\?',' ')
END AS ActualDebugData
FROM (
SELECT
HardwareId, DebugReason, DebugData,
SUBSTR(DebugData,0,3) AS d1, SUBSTR(DebugData,4,LENGTH(Reason - 1)) AS d2
FROM TABLE_DATE_RANGE([bigdata:RawDebug.T],TIMESTAMP('2016-05-15'),TIMESTAMP('2016-05-15'))
WHERE Reason = 500
) AS d
LEFT JOIN (
SELECT
Network, Mcc, Mnc
//,ROW_NUMBER() OVER(PARTITION BY Mcc, Mnc) AS pos
FROM [bigdata:RawDebug.CarrierDetails]
) AS c
ON c.Mcc = d.d1 AND c.Mnc = d.d2
//WHERE c.pos = 1
如果 network
保证 d 中的每个条目都是唯一的 - 您可以删除注释行。
否则你应该取消注释它们
我正在使用交叉连接访问 2 table 秒的数据。但是通过交叉连接,我得到错误 "d.DebugData not found in table "bigdata:RawDebug.CarrierDetails”。任何帮助将不胜感激!!
SELECT
HardwareId, DebugReason, DebugData,
CASE
WHEN REGEXP_MATCH(DebugData,'\d+') THEN c.Network
ELSE REGEXP_REPLACE(DebugData,'\?',' ')
END
as ActualDebugData
FROM(
SELECT
HardwareId, DebugReason, DebugData
FROM TABLE_DATE_RANGE([bigdata:RawDebug.T],TIMESTAMP ('2016-05-15'),TIMESTAMP('2016-05-15'))
WHERE Reason = 500
) as d
CROSS JOIN (
SELECT Network
FROM [bigdata:RawDebug.CarrierDetails]
WHERE Mcc = substr(d.DebugData,0,3) AND Mnc = substr(d.DebugData,4,LENGTH(d.Reason - 1))
LIMIT 1
) AS c
试过了,但我得到这个错误:“ON 子句必须是 AND of = 每个 table 的一个字段名称的比较,所有字段名称都以 table 名称为前缀。”
%%sql --module Test2
DEFINE QUERY Test2
SELECT
HardwareId, DebugReason, DebugData,
CASE
WHEN REGEXP_MATCH(DebugData,'\d+') THEN c.Network
ELSE REGEXP_REPLACE(DebugData,'\?',' ')
END AS ActualDebugData
FROM (
SELECT
HardwareId, DebugReason, DebugData,
SUBSTR(DebugData,0,3) AS d1, REGEXP_REPLACE(SUBSTR(DebugData,3,LENGTH(DebugData)-1),'%[^a-zA-Z0-9, ]%',' ') as d2
FROM TABLE_DATE_RANGE([bigdata:RawDebug.T],TIMESTAMP('2016-05-15'),TIMESTAMP('2016-05-15'))
WHERE DebugReason = 500
) AS d
LEFT JOIN (
SELECT
Network, Mcc, Mnc
,ROW_NUMBER() OVER(PARTITION BY Mcc, Mnc) AS pos
FROM [bigdata:RawDebug.CarrierDetails]
) AS c
ON c.Mcc = INTEGER(d.d1) AND c.Mnc = INTEGER(d.d2)
WHERE c.pos = 1
我正在添加以下结构:
RawDebug:
HardwareId DebugReason DebugData
550029358 50013 VER%
550029359 50013 RO%
550029360 50013 34020?
550029361 50013 34021?
当 DebugData 有字符时,我有匹配它的 case 语句,当它有数字时,我必须取前 3 个字符的子串并将其与 Carrierdetails 中的 Mcc 匹配,其余字符与 Mnc 匹配在承运人详细信息中。
最近的查询,没有考虑所有的情况。相反,它采用一个特定的数字并对所有行使用 tat ActualDebugData。
SELECT
HardwareId, DebugReason, DebugData,
CASE
WHEN REGEXP_MATCH(DebugData,'\d+') THEN c.Network
ELSE REGEXP_REPLACE(DebugData,'\?',' ')
END AS ActualDebugData
FROM (
SELECT
HardwareId, DebugReason, DebugData,
SUBSTR(DebugData,0,3) AS d1, SUBSTR(DebugData,4,LENGTH(Reason - 1)) AS d2
FROM TABLE_DATE_RANGE([bigdata:RawDebug.T],TIMESTAMP('2016-05-15'),TIMESTAMP('2016-05-15'))
WHERE Reason = 500
) AS d
LEFT JOIN (
SELECT
Network, Mcc, Mnc
//,ROW_NUMBER() OVER(PARTITION BY Mcc, Mnc) AS pos
FROM [bigdata:RawDebug.CarrierDetails]
) AS c
ON c.Mcc = d.d1 AND c.Mnc = d.d2
//WHERE c.pos = 1
如果 network
保证 d 中的每个条目都是唯一的 - 您可以删除注释行。
否则你应该取消注释它们