BigQuery 转化率计算不正确
BigQuery conversion rate not calculating properly
我正在尝试在 BigQuery 中获取基准数据集。在此数据集中,我想获取会话、跳出率、新用户等数据,但最重要的是转化率。然而,这些似乎没有正确计算。转换率主要给出不应为 null 的值的 null 值,不幸的是,不为 null 的值是错误的。我一直在谷歌搜索一些关于跳出率等计算的答案,在我看来,转换率的计算应该像这样工作。
我也尝试过不同的公式来计算转化率,但格式与下面的代码相同。
编辑:会话计算肯定有问题,因为它提供的会话少于用户
我正在使用以下代码:
SELECT
actiontimestamp,
medium,
source,
users,
newUsers,
sessions,
ROUND(SAFE_DIVIDE(pageviews, sessions), 0) AS pages_per_session,
CASE
WHEN sessions = 0 THEN 0
ELSE ROUND(SAFE_DIVIDE(bounces, sessions), 2)
END AS bounce_rate,
ROUND(avgTimeOnSite, 2)
transactions,
(SAFE_DIVIDE(transactions, sessions)*100) AS conversion_rate
FROM (
SELECT
actiontimestamp,
medium,
source,
COUNT(fullVisitorId) AS users,
COUNT(DISTINCT fullVisitorId) AS newUsers,
COUNT(transaction) AS transactions,
COUNT(pageviews) AS pageviews,
SUM(bounces) AS bounces,
SUM(sessions) AS sessions,
AVG(avgTimeOnSite) AS avgTimeOnSite
FROM (
SELECT
fullVisitorId,
visitStartTime,
pageviews,
actiontimestamp,
avgTimeOnSite,
transaction,
medium,
source,
CASE
WHEN hitNumber = first_interaction THEN bounces
ELSE 0
END AS bounces,
CASE
WHEN hitNumber = first_hit THEN visits
ELSE 0
END AS sessions
FROM (
SELECT
fullVisitorId,
visitStartTime,
IFNULL(totals.pageviews,
0) AS pageviews,
totals.bounces,
totals.visits,
hits.hitNumber,
MIN(IF(hits.isInteraction IS NOT NULL,
hits.hitNumber,
0)) OVER (PARTITION BY fullVisitorId, visitStartTime) AS first_interaction,
MIN(hits.hitNumber) OVER (PARTITION BY fullVisitorId, visitStartTime) AS first_hit,
FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP_SECONDS(SAFE_CAST(visitStartTime AS INT64)), "Europe/London") AS actiontimestamp,
totals.timeOnSite AS avgTimeOnSite,
hits.transaction.transactionId AS transaction,
trafficSource.medium AS medium,
trafficSource.source AS source
FROM
`ga_table_id.ga_sessions_*`,
UNNEST(hits) AS hits
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', '2018-11-01')
AND FORMAT_DATE('%Y%m%d', '2018-11-30')))
GROUP BY
actiontimestamp,
medium,
source)
ORDER BY
actiontimestamp DESC
变量应该反过来吗?
SAFE_DIVIDE(transactions, sessions)
因为会话是在不同级别定义的,所以代码无法正常工作。但是,当我制作 2 个单独的 tables 并加入它们时,它工作得很好。 2 table 有不同的路径来获取使计算正常工作的会话。
SELECT
actiontimestamp,
medium,
source,
sessions,
ROUND(SAFE_DIVIDE(pageviews,
sessions), 0) AS pages_per_session,
CASE
WHEN sessions = 0 THEN 0
ELSE ROUND(SAFE_DIVIDE(bounces,
sessions), 2)
END AS bounce_rate,
ROUND(avgTimeOnSite, 2) AS avgTimeOnSite
FROM (
SELECT
actiontimestamp,
medium,
source,
AVG(pageviews) AS pageviews,
SUM(bounces) AS bounces,
SUM(sessions) AS sessions,
AVG(avgTimeOnSite) AS avgTimeOnSite
FROM (
SELECT
fullVisitorId,
pageviews,
actiontimestamp,
avgTimeOnSite,
medium,
source,
CASE
WHEN hitNumber = first_interaction THEN bounces
ELSE 0
END AS bounces,
CASE
WHEN hitNumber = first_hit THEN visits
ELSE 0
END AS sessions
FROM (
SELECT
fullVisitorId,
visitStartTime,
IFNULL(totals.pageviews,
0) AS pageviews,
totals.bounces,
totals.visits,
totals.newVisits AS newVisits,
hits.hitNumber,
MIN(IF(hits.isInteraction IS NOT NULL,
hits.hitNumber,
0)) OVER (PARTITION BY fullVisitorId, visitStartTime) AS first_interaction,
MIN(hits.hitNumber) OVER (PARTITION BY fullVisitorId, visitStartTime) AS first_hit,
FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP_SECONDS(SAFE_CAST(visitStartTime AS INT64)), "Europe/London") AS actiontimestamp,
totals.timeOnSite AS avgTimeOnSite,
trafficSource.medium AS medium,
trafficSource.source AS source
FROM
`gatable.ga_sessions_*`,
UNNEST(hits) AS hits
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', '2018-11-01')
AND FORMAT_DATE('%Y%m%d', '2018-11-30')))
GROUP BY
actiontimestamp,
medium,
source)
ORDER BY
actiontimestamp DESC
然后第二个 table 将是:
SELECT
actiontimestamp,
medium,
source,
users,
newUsers,
sessions,
transactions,
ROUND((SAFE_DIVIDE(transactions,
sessions)*100), 2) AS conversion_rate
FROM (
SELECT
FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP_SECONDS(SAFE_CAST(visitStartTime AS INT64)), "Europe/London") AS actiontimestamp,
SUM(totals.transactions) AS transactions,
COUNT(DISTINCT fullVisitorId) AS users,
SUM(totals.visits) AS sessions,
COUNT(totals.newVisits) AS newUsers,
trafficSource.medium AS medium,
trafficSource.source AS source
FROM
`91775944.ga_sessions_*`
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', '2018-11-01')
AND FORMAT_DATE('%Y%m%d', '2018-11-30')
GROUP BY
actiontimestamp,
medium,
source
)
然后这些 tables 在 actiontimestamp、medium 和 source 上加入,我得到了我需要的结果。
我正在尝试在 BigQuery 中获取基准数据集。在此数据集中,我想获取会话、跳出率、新用户等数据,但最重要的是转化率。然而,这些似乎没有正确计算。转换率主要给出不应为 null 的值的 null 值,不幸的是,不为 null 的值是错误的。我一直在谷歌搜索一些关于跳出率等计算的答案,在我看来,转换率的计算应该像这样工作。
我也尝试过不同的公式来计算转化率,但格式与下面的代码相同。
编辑:会话计算肯定有问题,因为它提供的会话少于用户
我正在使用以下代码:
SELECT
actiontimestamp,
medium,
source,
users,
newUsers,
sessions,
ROUND(SAFE_DIVIDE(pageviews, sessions), 0) AS pages_per_session,
CASE
WHEN sessions = 0 THEN 0
ELSE ROUND(SAFE_DIVIDE(bounces, sessions), 2)
END AS bounce_rate,
ROUND(avgTimeOnSite, 2)
transactions,
(SAFE_DIVIDE(transactions, sessions)*100) AS conversion_rate
FROM (
SELECT
actiontimestamp,
medium,
source,
COUNT(fullVisitorId) AS users,
COUNT(DISTINCT fullVisitorId) AS newUsers,
COUNT(transaction) AS transactions,
COUNT(pageviews) AS pageviews,
SUM(bounces) AS bounces,
SUM(sessions) AS sessions,
AVG(avgTimeOnSite) AS avgTimeOnSite
FROM (
SELECT
fullVisitorId,
visitStartTime,
pageviews,
actiontimestamp,
avgTimeOnSite,
transaction,
medium,
source,
CASE
WHEN hitNumber = first_interaction THEN bounces
ELSE 0
END AS bounces,
CASE
WHEN hitNumber = first_hit THEN visits
ELSE 0
END AS sessions
FROM (
SELECT
fullVisitorId,
visitStartTime,
IFNULL(totals.pageviews,
0) AS pageviews,
totals.bounces,
totals.visits,
hits.hitNumber,
MIN(IF(hits.isInteraction IS NOT NULL,
hits.hitNumber,
0)) OVER (PARTITION BY fullVisitorId, visitStartTime) AS first_interaction,
MIN(hits.hitNumber) OVER (PARTITION BY fullVisitorId, visitStartTime) AS first_hit,
FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP_SECONDS(SAFE_CAST(visitStartTime AS INT64)), "Europe/London") AS actiontimestamp,
totals.timeOnSite AS avgTimeOnSite,
hits.transaction.transactionId AS transaction,
trafficSource.medium AS medium,
trafficSource.source AS source
FROM
`ga_table_id.ga_sessions_*`,
UNNEST(hits) AS hits
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', '2018-11-01')
AND FORMAT_DATE('%Y%m%d', '2018-11-30')))
GROUP BY
actiontimestamp,
medium,
source)
ORDER BY
actiontimestamp DESC
变量应该反过来吗?
SAFE_DIVIDE(transactions, sessions)
因为会话是在不同级别定义的,所以代码无法正常工作。但是,当我制作 2 个单独的 tables 并加入它们时,它工作得很好。 2 table 有不同的路径来获取使计算正常工作的会话。
SELECT
actiontimestamp,
medium,
source,
sessions,
ROUND(SAFE_DIVIDE(pageviews,
sessions), 0) AS pages_per_session,
CASE
WHEN sessions = 0 THEN 0
ELSE ROUND(SAFE_DIVIDE(bounces,
sessions), 2)
END AS bounce_rate,
ROUND(avgTimeOnSite, 2) AS avgTimeOnSite
FROM (
SELECT
actiontimestamp,
medium,
source,
AVG(pageviews) AS pageviews,
SUM(bounces) AS bounces,
SUM(sessions) AS sessions,
AVG(avgTimeOnSite) AS avgTimeOnSite
FROM (
SELECT
fullVisitorId,
pageviews,
actiontimestamp,
avgTimeOnSite,
medium,
source,
CASE
WHEN hitNumber = first_interaction THEN bounces
ELSE 0
END AS bounces,
CASE
WHEN hitNumber = first_hit THEN visits
ELSE 0
END AS sessions
FROM (
SELECT
fullVisitorId,
visitStartTime,
IFNULL(totals.pageviews,
0) AS pageviews,
totals.bounces,
totals.visits,
totals.newVisits AS newVisits,
hits.hitNumber,
MIN(IF(hits.isInteraction IS NOT NULL,
hits.hitNumber,
0)) OVER (PARTITION BY fullVisitorId, visitStartTime) AS first_interaction,
MIN(hits.hitNumber) OVER (PARTITION BY fullVisitorId, visitStartTime) AS first_hit,
FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP_SECONDS(SAFE_CAST(visitStartTime AS INT64)), "Europe/London") AS actiontimestamp,
totals.timeOnSite AS avgTimeOnSite,
trafficSource.medium AS medium,
trafficSource.source AS source
FROM
`gatable.ga_sessions_*`,
UNNEST(hits) AS hits
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', '2018-11-01')
AND FORMAT_DATE('%Y%m%d', '2018-11-30')))
GROUP BY
actiontimestamp,
medium,
source)
ORDER BY
actiontimestamp DESC
然后第二个 table 将是:
SELECT
actiontimestamp,
medium,
source,
users,
newUsers,
sessions,
transactions,
ROUND((SAFE_DIVIDE(transactions,
sessions)*100), 2) AS conversion_rate
FROM (
SELECT
FORMAT_TIMESTAMP("%Y-%m-%d", TIMESTAMP_SECONDS(SAFE_CAST(visitStartTime AS INT64)), "Europe/London") AS actiontimestamp,
SUM(totals.transactions) AS transactions,
COUNT(DISTINCT fullVisitorId) AS users,
SUM(totals.visits) AS sessions,
COUNT(totals.newVisits) AS newUsers,
trafficSource.medium AS medium,
trafficSource.source AS source
FROM
`91775944.ga_sessions_*`
WHERE
_TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d', '2018-11-01')
AND FORMAT_DATE('%Y%m%d', '2018-11-30')
GROUP BY
actiontimestamp,
medium,
source
)
然后这些 tables 在 actiontimestamp、medium 和 source 上加入,我得到了我需要的结果。