select 使用 standardsql 的 bigquery 中的不同值
select distinct values in bigquery using standardsql
我想 select 多列并将电子邮件与 GROUP BY
分组
#standardSQL
SELECT
customers.orderCustomerEmail AS email,
customers.orderCustomerNumber AS customerNumber,
customers.billingFirstname AS billingFirstname,
customers.billingLastname AS billingLastname
FROM dim_customers AS customers
GROUP BY customers.orderCustomerEmail
失败:
Error: SELECT list expression references customers.orderCustomerNumber
which is neither grouped nor aggregated at [4:7]
这类似于这个问题
但这并没有解决我的问题,因为将所有列添加到 GROUP BY
的结果与 SELECT DISTINCT
相同
dim_customer 架构:
orderCustomerEmail:STRING,
billingFirstname:STRING,
billingLastname:STRING,
orderCustomerNumber:STRING,
OrderNumber:STRING
虚拟数据:https://docs.google.com/spreadsheets/d/1T1JZRWni18hhU4tO-9kQqq5Y3hVWgpP-aE7o6ij9bDE/edit?usp=sharing
按某些列分组时,您需要确保将某些聚合函数应用于其余列。否则你会得到你在问题中显示的错误
尝试以下 BigQuery 标准示例 SQL
#standardSQL
SELECT
customers.orderCustomerEmail AS email,
ARRAY_AGG(STRUCT(customers.orderCustomerNumber AS customerNumber,
customers.billingFirstname AS billingFirstname,
customers.billingLastname AS billingLastname)) AS info
FROM `dim_customers`, UNNEST(customers) AS customers
GROUP BY email
或者简单的 DISTINCT
#standardSQL
SELECT DISTINCT
customers.orderCustomerEmail AS email,
customers.orderCustomerNumber AS customerNumber,
customers.billingFirstname AS billingFirstname,
customers.billingLastname AS billingLastname
FROM `dim_customers`, UNNEST(customers) AS customers
请注意:就您期望的确切输出而言,您的问题不够具体,因此上述内容很可能需要根据您的具体需求进行一些调整
更新
i need basically one row per customer (email being the unique identifier, hence the group) the details (number, firstname, lastname) can be taken from the last entry e.g
#standardSQL
WITH `dim_customers` AS (
SELECT [
STRUCT('a' AS orderCustomerEmail, 1 AS orderCustomerNumber, 'af' AS billingFirstname, 'al' AS billingLastname),
STRUCT('a' AS orderCustomerEmail, 4 AS orderCustomerNumber, 'af1' AS billingFirstname, 'al2' AS billingLastname),
STRUCT('b' AS orderCustomerEmail, 2 AS orderCustomerNumber, 'bf' AS billingFirstname, 'bl' AS billingLastname),
STRUCT('c' AS orderCustomerEmail, 3 AS orderCustomerNumber, 'cf' AS billingFirstname, 'cl' AS billingLastname)
] AS customers UNION ALL
SELECT [
STRUCT('a' AS orderCustomerEmail, 1 AS orderCustomerNumber, 'af' AS billingFirstname, 'al' AS billingLastname),
STRUCT('a' AS orderCustomerEmail, 4 AS orderCustomerNumber, 'af1' AS billingFirstname, 'al2' AS billingLastname),
STRUCT('b' AS orderCustomerEmail, 2 AS orderCustomerNumber, 'bf' AS billingFirstname, 'bl' AS billingLastname),
STRUCT('c' AS orderCustomerEmail, 3 AS orderCustomerNumber, 'cf' AS billingFirstname, 'cl' AS billingLastname)
] AS customers
)
SELECT
customers.orderCustomerEmail AS email,
ARRAY_AGG(STRUCT(customers.orderCustomerNumber AS customerNumber,
customers.billingFirstname AS billingFirstname,
customers.billingLastname AS billingLastname))[OFFSET(0)] AS info
FROM `dim_customers`, UNNEST(customers) AS customers
GROUP BY email
更新
below is for updated schema!
dim_customer 架构:
orderCustomerEmail:STRING,
billingFirstname:STRING,
billingLastname:STRING,
orderCustomerNumber:STRING,
OrderNumber:STRING
#standardSQL
WITH `dim_customers` AS (
SELECT 10201 AS orderCustomerNumber, 'a@email.com' AS orderCustomerEmail, 'Alex' AS billingFirstname, 'Miller' AS billingLastname UNION ALL
SELECT 10202, 'b@email.com', 'Ben', 'Williams' UNION ALL
SELECT 10203, 'c@email.com', 'Chris', 'Collins' UNION ALL
SELECT 10204, 'd@email.com', 'David', 'Hems' UNION ALL
SELECT 10201, 'a@email.com', 'A.', 'Miller' UNION ALL
SELECT 10201, 'a@email.com', 'A.', 'Miller' UNION ALL
SELECT 10202, 'b@email.com', 'Ben', 'Williams' UNION ALL
SELECT 10202, 'b@email.com', 'Bens Father', 'Williams' UNION ALL
SELECT 10205, 'a@email.com', 'A.', 'Miller' UNION ALL
SELECT 10206, 'e@email.com', 'Ed', 'Winchell'
)
SELECT info.* FROM (
SELECT
orderCustomerEmail AS email,
ARRAY_AGG(STRUCT(
orderCustomerEmail AS email,
orderCustomerNumber AS customerNumber,
billingFirstname AS billingFirstname,
billingLastname AS billingLastname))[OFFSET(0)] AS info
FROM `dim_customers`
GROUP BY email
)
-- ORDER BY email
我想 select 多列并将电子邮件与 GROUP BY
#standardSQL
SELECT
customers.orderCustomerEmail AS email,
customers.orderCustomerNumber AS customerNumber,
customers.billingFirstname AS billingFirstname,
customers.billingLastname AS billingLastname
FROM dim_customers AS customers
GROUP BY customers.orderCustomerEmail
失败:
Error: SELECT list expression references customers.orderCustomerNumber
which is neither grouped nor aggregated at [4:7]
这类似于这个问题
但这并没有解决我的问题,因为将所有列添加到 GROUP BY
的结果与 SELECT DISTINCT
dim_customer 架构:
orderCustomerEmail:STRING,
billingFirstname:STRING,
billingLastname:STRING,
orderCustomerNumber:STRING,
OrderNumber:STRING
虚拟数据:https://docs.google.com/spreadsheets/d/1T1JZRWni18hhU4tO-9kQqq5Y3hVWgpP-aE7o6ij9bDE/edit?usp=sharing
按某些列分组时,您需要确保将某些聚合函数应用于其余列。否则你会得到你在问题中显示的错误
尝试以下 BigQuery 标准示例 SQL
#standardSQL
SELECT
customers.orderCustomerEmail AS email,
ARRAY_AGG(STRUCT(customers.orderCustomerNumber AS customerNumber,
customers.billingFirstname AS billingFirstname,
customers.billingLastname AS billingLastname)) AS info
FROM `dim_customers`, UNNEST(customers) AS customers
GROUP BY email
或者简单的 DISTINCT
#standardSQL
SELECT DISTINCT
customers.orderCustomerEmail AS email,
customers.orderCustomerNumber AS customerNumber,
customers.billingFirstname AS billingFirstname,
customers.billingLastname AS billingLastname
FROM `dim_customers`, UNNEST(customers) AS customers
请注意:就您期望的确切输出而言,您的问题不够具体,因此上述内容很可能需要根据您的具体需求进行一些调整
更新
i need basically one row per customer (email being the unique identifier, hence the group) the details (number, firstname, lastname) can be taken from the last entry e.g
#standardSQL
WITH `dim_customers` AS (
SELECT [
STRUCT('a' AS orderCustomerEmail, 1 AS orderCustomerNumber, 'af' AS billingFirstname, 'al' AS billingLastname),
STRUCT('a' AS orderCustomerEmail, 4 AS orderCustomerNumber, 'af1' AS billingFirstname, 'al2' AS billingLastname),
STRUCT('b' AS orderCustomerEmail, 2 AS orderCustomerNumber, 'bf' AS billingFirstname, 'bl' AS billingLastname),
STRUCT('c' AS orderCustomerEmail, 3 AS orderCustomerNumber, 'cf' AS billingFirstname, 'cl' AS billingLastname)
] AS customers UNION ALL
SELECT [
STRUCT('a' AS orderCustomerEmail, 1 AS orderCustomerNumber, 'af' AS billingFirstname, 'al' AS billingLastname),
STRUCT('a' AS orderCustomerEmail, 4 AS orderCustomerNumber, 'af1' AS billingFirstname, 'al2' AS billingLastname),
STRUCT('b' AS orderCustomerEmail, 2 AS orderCustomerNumber, 'bf' AS billingFirstname, 'bl' AS billingLastname),
STRUCT('c' AS orderCustomerEmail, 3 AS orderCustomerNumber, 'cf' AS billingFirstname, 'cl' AS billingLastname)
] AS customers
)
SELECT
customers.orderCustomerEmail AS email,
ARRAY_AGG(STRUCT(customers.orderCustomerNumber AS customerNumber,
customers.billingFirstname AS billingFirstname,
customers.billingLastname AS billingLastname))[OFFSET(0)] AS info
FROM `dim_customers`, UNNEST(customers) AS customers
GROUP BY email
更新
below is for updated schema!
dim_customer 架构:
orderCustomerEmail:STRING,
billingFirstname:STRING,
billingLastname:STRING,
orderCustomerNumber:STRING,
OrderNumber:STRING
#standardSQL
WITH `dim_customers` AS (
SELECT 10201 AS orderCustomerNumber, 'a@email.com' AS orderCustomerEmail, 'Alex' AS billingFirstname, 'Miller' AS billingLastname UNION ALL
SELECT 10202, 'b@email.com', 'Ben', 'Williams' UNION ALL
SELECT 10203, 'c@email.com', 'Chris', 'Collins' UNION ALL
SELECT 10204, 'd@email.com', 'David', 'Hems' UNION ALL
SELECT 10201, 'a@email.com', 'A.', 'Miller' UNION ALL
SELECT 10201, 'a@email.com', 'A.', 'Miller' UNION ALL
SELECT 10202, 'b@email.com', 'Ben', 'Williams' UNION ALL
SELECT 10202, 'b@email.com', 'Bens Father', 'Williams' UNION ALL
SELECT 10205, 'a@email.com', 'A.', 'Miller' UNION ALL
SELECT 10206, 'e@email.com', 'Ed', 'Winchell'
)
SELECT info.* FROM (
SELECT
orderCustomerEmail AS email,
ARRAY_AGG(STRUCT(
orderCustomerEmail AS email,
orderCustomerNumber AS customerNumber,
billingFirstname AS billingFirstname,
billingLastname AS billingLastname))[OFFSET(0)] AS info
FROM `dim_customers`
GROUP BY email
)
-- ORDER BY email