将非规范化 table 转换为嵌套结构
Turning denormalized table into a nested structure
这里是相当初级的数据分析师
我正在尝试将 GA4 中的非规范化事件数据转换为对 BI 更友好的嵌套格式。
原始 GA4 数据架构:
GA4 schema
起点是事件级别的数据,但是当我尝试基于 user_pseudo_id 创建一个深入的用户仪表板时,我想创建三个抽象层:
- 包含设备信息、总体统计信息和作为嵌套重复记录的会话的用户级别
- 包含地理数据、会话时长、访问页面数量以及会话的所有事件作为嵌套重复记录的会话级别
- 包含时间戳、事件类型和事件特定信息的事件级别。
到目前为止我的代码:
...events_joined_with_transactions AS (
SELECT
ue.*,
t.transaction_id,
t.currency,
t.shipping,
t.tax,
t.revenue,
t.unique_items,
t.total_items,
t.items
FROM user_events AS ue
LEFT JOIN transactions AS t
ON ue.event_name = "purchase"
AND ue.user_pseudo_id = t.user_pseudo_id
AND t.timestamp = ue.timestamp
),
sessions AS (
SELECT
user_pseudo_id,
session_id,
source_medium,
campaign_name,
ARRAY_AGG(
STRUCT(
date,
timestamp,
event_name,
event_specific_info
)
) AS events
FROM events_joined_with_transactions
GROUP BY 1, 2, 3, 4
),
users AS (
SELECT
user_pseudo_id,
SUM(IF(event_name != "user_engagement", 1, 0)) AS total_events,
SUM(IF(event_name = "session_start", 1, 0)) AS sessions,
SUM(IF(event_name = "page_view", 1, 0)) AS view_page,
SUM(IF(event_name = "view_item", 1, 0)) AS view_item,
SUM(IF(event_name = "add_to_cart", 1, 0)) AS add_to_cart,
SUM(IF(event_name = "remove_from_cart", 1, 0)) AS remove_from_cart,
SUM(IF(event_name = "add_payment_info", 1, 0)) AS add_payment_info,
SUM(IF(event_name = "add_shipping_info", 1, 0)) AS add_shipping_info,
SUM(IF(event_name = "begin_checkout", 1, 0)) AS begin_checkout,
SUM(IF(event_name = "purchase", 1, 0)) AS transactions,
SUM(shipping) AS total_shipping,
SUM(tax) AS total_tax,
SUM(revenue) AS total_revenue,
SUM(total_items) AS total_items,
FROM events_joined_with_transactions
GROUP BY 1
),
final AS (
SELECT
u.user_pseudo_id,
u.total_events,
u.sessions,
u.view_page,
u.view_item,
u.add_to_cart,
u.remove_from_cart,
u.add_payment_info,
u.add_shipping_info,
u.begin_checkout,
u.transactions,
u.total_shipping,
u.total_tax,
u.total_revenue,
u.total_items,
ARRAY_AGG(
session_id,
source_medium,
campaign_name,
events
) AS sessions
FROM users u
LEFT JOIN sessions s
USING(user_pseudo_id)
)
SELECT *
FROM final
但是,我收到以下错误消息:
The argument to ARRAY_AGG must not be an array type but was ARRAY<STRUCT<date DATE, timestamp TIMESTAMP, event_name STRING, event_specific_info ARRAY<STRUCT<key STRING, value STRUCT<string_val STRING, int_val INT64, float_val DOUBLE>>>>>
为什么这个数组类型无效?
根据您的代码,session table 中的 events 字段已经是数组类型,因此不能在最后 array_agg.
中使用
来自官方doc
Supported Argument Types
All data types except ARRAY.
这里是相当初级的数据分析师
我正在尝试将 GA4 中的非规范化事件数据转换为对 BI 更友好的嵌套格式。 原始 GA4 数据架构: GA4 schema
起点是事件级别的数据,但是当我尝试基于 user_pseudo_id 创建一个深入的用户仪表板时,我想创建三个抽象层:
- 包含设备信息、总体统计信息和作为嵌套重复记录的会话的用户级别
- 包含地理数据、会话时长、访问页面数量以及会话的所有事件作为嵌套重复记录的会话级别
- 包含时间戳、事件类型和事件特定信息的事件级别。
到目前为止我的代码:
...events_joined_with_transactions AS (
SELECT
ue.*,
t.transaction_id,
t.currency,
t.shipping,
t.tax,
t.revenue,
t.unique_items,
t.total_items,
t.items
FROM user_events AS ue
LEFT JOIN transactions AS t
ON ue.event_name = "purchase"
AND ue.user_pseudo_id = t.user_pseudo_id
AND t.timestamp = ue.timestamp
),
sessions AS (
SELECT
user_pseudo_id,
session_id,
source_medium,
campaign_name,
ARRAY_AGG(
STRUCT(
date,
timestamp,
event_name,
event_specific_info
)
) AS events
FROM events_joined_with_transactions
GROUP BY 1, 2, 3, 4
),
users AS (
SELECT
user_pseudo_id,
SUM(IF(event_name != "user_engagement", 1, 0)) AS total_events,
SUM(IF(event_name = "session_start", 1, 0)) AS sessions,
SUM(IF(event_name = "page_view", 1, 0)) AS view_page,
SUM(IF(event_name = "view_item", 1, 0)) AS view_item,
SUM(IF(event_name = "add_to_cart", 1, 0)) AS add_to_cart,
SUM(IF(event_name = "remove_from_cart", 1, 0)) AS remove_from_cart,
SUM(IF(event_name = "add_payment_info", 1, 0)) AS add_payment_info,
SUM(IF(event_name = "add_shipping_info", 1, 0)) AS add_shipping_info,
SUM(IF(event_name = "begin_checkout", 1, 0)) AS begin_checkout,
SUM(IF(event_name = "purchase", 1, 0)) AS transactions,
SUM(shipping) AS total_shipping,
SUM(tax) AS total_tax,
SUM(revenue) AS total_revenue,
SUM(total_items) AS total_items,
FROM events_joined_with_transactions
GROUP BY 1
),
final AS (
SELECT
u.user_pseudo_id,
u.total_events,
u.sessions,
u.view_page,
u.view_item,
u.add_to_cart,
u.remove_from_cart,
u.add_payment_info,
u.add_shipping_info,
u.begin_checkout,
u.transactions,
u.total_shipping,
u.total_tax,
u.total_revenue,
u.total_items,
ARRAY_AGG(
session_id,
source_medium,
campaign_name,
events
) AS sessions
FROM users u
LEFT JOIN sessions s
USING(user_pseudo_id)
)
SELECT *
FROM final
但是,我收到以下错误消息:
The argument to ARRAY_AGG must not be an array type but was ARRAY<STRUCT<date DATE, timestamp TIMESTAMP, event_name STRING, event_specific_info ARRAY<STRUCT<key STRING, value STRUCT<string_val STRING, int_val INT64, float_val DOUBLE>>>>>
为什么这个数组类型无效?
根据您的代码,session table 中的 events 字段已经是数组类型,因此不能在最后 array_agg.
中使用来自官方doc
Supported Argument Types
All data types except ARRAY.