BigQuery GA Open Funnel Legacy SQL:排除已查看特定页面的会话
BigQuery GA Open Funnel Legacy SQL: Exclude Sessions that have viewed certain pages
我正在尝试在 BigQuery 中重新创建 GA 漏斗,这个打开的漏斗会排除查看过某些页面的会话,我尝试使用以下内容:AND NOT REGEXP_MATCH、NOT IN,但它仍然无法正常工作如我所料,我仍然收到查看我要排除的页面的会话。
如果可能的话,我也想将其设为封闭漏斗,此代码 returns 设为开放漏斗。
此外,是否有更好的方式在标准 SQL 中编写此查询?
在这些方面需要帮助。谢谢。
SELECT COUNT(s0.firstHit) AS _test_your_details,
SUM(s0.exit) AS _test_your_details_exits,
COUNT(s1.firstHit) AS _test_additional_new_details,
SUM(s1.exit) AS _test_additional_new_details_exits,
COUNT(s2.firstHit) AS _test_new_dress,
SUM(s2.exit) AS _test_new_dress_exits,
COUNT(s3.firstHit) AS _test_test_details,
SUM(s3.exit) AS _test_test_details_exits,
COUNT(s4.firstHit) AS _test_cover_for_the_test,
SUM(s4.exit) AS _test_cover_for_the_test_exits,
COUNT(s5.firstHit) AS _test_your_order,
SUM(s5.exit) AS _test_your_order_exits
FROM
(SELECT s0.fullVisitorId,
s0.visitId,
s0.firstHit,
s0.exit,
s1.firstHit,
s1.exit,
s2.firstHit,
s2.exit,
s3.firstHit,
s3.exit,
s4.firstHit,
s4.exit,
s5.firstHit,
s5.exit
FROM
(SELECT s0.fullVisitorId,
s0.visitId,
s0.firstHit,
s0.exit,
s1.firstHit,
s1.exit,
s2.firstHit,
s2.exit,
s3.firstHit,
s3.exit,
s4.firstHit,
s4.exit
FROM
(SELECT s0.fullVisitorId,
s0.visitId,
s0.firstHit,
s0.exit,
s1.firstHit,
s1.exit,
s2.firstHit,
s2.exit,
s3.firstHit,
s3.exit
FROM
(SELECT s0.fullVisitorId,
s0.visitId,
s0.firstHit,
s0.exit,
s1.firstHit,
s1.exit,
s2.firstHit,
s2.exit
FROM
(SELECT s0.fullVisitorId,
s0.visitId,
s0.firstHit,
s0.exit,
s1.firstHit,
s1.exit
FROM
(SELECT fullVisitorId,
visitId,
MIN(hits.hitNumber) AS firstHit,
MAX(IF(hits.isExit, 1, 0)) AS exit
FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30'))
WHERE REGEXP_MATCH(hits.page.pagePath, '/test - your details')
AND totals.visits = 1
AND channelGrouping NOT LIKE '%organic%'
AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress', '/test - cover dress')
AND NOT REGEXP_MATCH(hits.page.pagePath, r"^/(test - additional test details|test - test dress|test - cover dress)")
GROUP BY fullVisitorId,
visitId) s0
FULL OUTER JOIN EACH
(SELECT fullVisitorId,
visitId,
MIN(hits.hitNumber) AS firstHit,
MAX(IF(hits.isExit, 1, 0)) AS exit
FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30'))
WHERE REGEXP_MATCH(hits.page.pagePath, '/test - additional new details')
AND totals.visits = 1
AND channelGrouping NOT LIKE '%organic%'
GROUP BY fullVisitorId,
visitId) s1 ON s0.fullVisitorId = s1.fullVisitorId
AND s0.visitId = s1.visitId) s01
FULL OUTER JOIN EACH
(SELECT fullVisitorId,
visitId,
MIN(hits.hitNumber) AS firstHit,
MAX(IF(hits.isExit, 1, 0)) AS exit
FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30'))
WHERE REGEXP_MATCH(hits.page.pagePath, '/test - new dress')
AND totals.visits = 1
AND channelGrouping NOT LIKE '%organic%'
GROUP BY fullVisitorId,
visitId) s2 ON s0.fullVisitorId = s2.fullVisitorId
AND s0.visitId = s2.visitId) s012
FULL OUTER JOIN EACH
(SELECT fullVisitorId,
visitId,
MIN(hits.hitNumber) AS firstHit,
MAX(IF(hits.isExit, 1, 0)) AS exit
FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30'))
WHERE REGEXP_MATCH(hits.page.pagePath, '/test - test details')
AND totals.visits = 1
AND channelGrouping NOT LIKE '%organic%'
GROUP BY fullVisitorId,
visitId) s3 ON s0.fullVisitorId = s3.fullVisitorId
AND s0.visitId = s3.visitId) s0123
FULL OUTER JOIN EACH
(SELECT fullVisitorId,
visitId,
MIN(hits.hitNumber) AS firstHit,
MAX(IF(hits.isExit, 1, 0)) AS exit
FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30'))
WHERE REGEXP_MATCH(hits.page.pagePath, '/test - cover for the test')
AND totals.visits = 1
AND channelGrouping NOT LIKE '%organic%'
AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress')
GROUP BY fullVisitorId,
visitId) s4 ON s0.fullVisitorId = s4.fullVisitorId
AND s0.visitId = s4.visitId) s01234
FULL OUTER JOIN EACH
(SELECT fullVisitorId,
visitId,
MIN(hits.hitNumber) AS firstHit,
MAX(IF(hits.isExit, 1, 0)) AS exit
FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30'))
WHERE REGEXP_MATCH(hits.page.pagePath, '/test - your order')
AND totals.visits = 1
AND channelGrouping NOT LIKE '%organic%'
AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress')
AND NOT REGEXP_MATCH(hits.page.pagePath, r"^/(test - additional test details|test - test dress|test - cover dress)")
GROUP BY fullVisitorId,
visitId) s5 ON s0.fullVisitorId = s5.fullVisitorId
AND s0.visitId = s5.visitId) s012345
在标准 SQL 中,您可以在 hits
上编写一个简单的子查询进行检查。例如:
SELECT
fullvisitorid, visitstarttime,
ARRAY(
SELECT AS STRUCT hitNumber, type, page FROM t.hits ORDER BY hitNumber
) hits
FROM
`bigquery-public-data.google_analytics_sample.ga_sessions_20161104` t
WHERE
-- exclude sessions with pages containing '/asearch.html'
-- subquery checks for occurences in the whole query and returns boolean TRUE if found
-- NOT turns it into FALSE which filters it out
NOT (SELECT COUNT(1)>0 FROM t.hits WHERE page.pagePath = '/asearch.html')
ORDER BY array_length(hits) DESC
LIMIT 1000
我还写了一个子查询来显示数组中会话的命中。
在旧版 SQL 中,您将使用 OMIT RECORD IF
:
SELECT
fullvisitorid, visitstarttime, hits.page.pagePath
FROM
[bigquery-public-data:google_analytics_sample.ga_sessions_20161104] t
-- OMIT RECORD IF excludes on record level
-- if dimension is below record level, you need to aggregate (like with WITHIN)
-- in this case I used MAX() to surface any possible TRUE resulting from the comparison
OMIT RECORD IF MAX(hits.page.pagePath = '/asearch.html')
LIMIT 1000
希望对您有所帮助!
我正在尝试在 BigQuery 中重新创建 GA 漏斗,这个打开的漏斗会排除查看过某些页面的会话,我尝试使用以下内容:AND NOT REGEXP_MATCH、NOT IN,但它仍然无法正常工作如我所料,我仍然收到查看我要排除的页面的会话。
如果可能的话,我也想将其设为封闭漏斗,此代码 returns 设为开放漏斗。
此外,是否有更好的方式在标准 SQL 中编写此查询?
在这些方面需要帮助。谢谢。
SELECT COUNT(s0.firstHit) AS _test_your_details, SUM(s0.exit) AS _test_your_details_exits, COUNT(s1.firstHit) AS _test_additional_new_details, SUM(s1.exit) AS _test_additional_new_details_exits, COUNT(s2.firstHit) AS _test_new_dress, SUM(s2.exit) AS _test_new_dress_exits, COUNT(s3.firstHit) AS _test_test_details, SUM(s3.exit) AS _test_test_details_exits, COUNT(s4.firstHit) AS _test_cover_for_the_test, SUM(s4.exit) AS _test_cover_for_the_test_exits, COUNT(s5.firstHit) AS _test_your_order, SUM(s5.exit) AS _test_your_order_exits FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit, s2.firstHit, s2.exit, s3.firstHit, s3.exit, s4.firstHit, s4.exit, s5.firstHit, s5.exit FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit, s2.firstHit, s2.exit, s3.firstHit, s3.exit, s4.firstHit, s4.exit FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit, s2.firstHit, s2.exit, s3.firstHit, s3.exit FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit, s2.firstHit, s2.exit FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit FROM (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - your details') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress', '/test - cover dress') AND NOT REGEXP_MATCH(hits.page.pagePath, r"^/(test - additional test details|test - test dress|test - cover dress)") GROUP BY fullVisitorId, visitId) s0 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - additional new details') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' GROUP BY fullVisitorId, visitId) s1 ON s0.fullVisitorId = s1.fullVisitorId AND s0.visitId = s1.visitId) s01 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - new dress') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' GROUP BY fullVisitorId, visitId) s2 ON s0.fullVisitorId = s2.fullVisitorId AND s0.visitId = s2.visitId) s012 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - test details') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' GROUP BY fullVisitorId, visitId) s3 ON s0.fullVisitorId = s3.fullVisitorId AND s0.visitId = s3.visitId) s0123 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - cover for the test') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress') GROUP BY fullVisitorId, visitId) s4 ON s0.fullVisitorId = s4.fullVisitorId AND s0.visitId = s4.visitId) s01234 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - your order') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress') AND NOT REGEXP_MATCH(hits.page.pagePath, r"^/(test - additional test details|test - test dress|test - cover dress)") GROUP BY fullVisitorId, visitId) s5 ON s0.fullVisitorId = s5.fullVisitorId AND s0.visitId = s5.visitId) s012345
在标准 SQL 中,您可以在 hits
上编写一个简单的子查询进行检查。例如:
SELECT
fullvisitorid, visitstarttime,
ARRAY(
SELECT AS STRUCT hitNumber, type, page FROM t.hits ORDER BY hitNumber
) hits
FROM
`bigquery-public-data.google_analytics_sample.ga_sessions_20161104` t
WHERE
-- exclude sessions with pages containing '/asearch.html'
-- subquery checks for occurences in the whole query and returns boolean TRUE if found
-- NOT turns it into FALSE which filters it out
NOT (SELECT COUNT(1)>0 FROM t.hits WHERE page.pagePath = '/asearch.html')
ORDER BY array_length(hits) DESC
LIMIT 1000
我还写了一个子查询来显示数组中会话的命中。
在旧版 SQL 中,您将使用 OMIT RECORD IF
:
SELECT
fullvisitorid, visitstarttime, hits.page.pagePath
FROM
[bigquery-public-data:google_analytics_sample.ga_sessions_20161104] t
-- OMIT RECORD IF excludes on record level
-- if dimension is below record level, you need to aggregate (like with WITHIN)
-- in this case I used MAX() to surface any possible TRUE resulting from the comparison
OMIT RECORD IF MAX(hits.page.pagePath = '/asearch.html')
LIMIT 1000
希望对您有所帮助!