如何使用 JOIN 语句合并临时表?
How can I make a union of temporary tables with JOIN statement?
通过WITH语句得到了三个临时表:
"october_fall10",
“november_fall11”和
“december_fall12”
现在我想通过变量“member_casual”与他们执行 INNER JOIN(下面的查询),但我收到以下警报
错误消息:Table name "november_fall11" missing dataset while no default dataset is set in the request.
不管我想用“december_fall12”来做,警报都会重复,但参考“december_fall12”
--This is Q1 from where I've got the 3 tables --
WITH october_fall10 AS
(SELECT
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_lenght,
member_casual
FROM
`ciclystic.cyclistic_seasonal_analysis.fall_202010` AS fall_analysis
ORDER BY
started_at DESC)
SELECT
member_casual,
start_week_date,
COUNT (member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
) AS avg_start_time
FROM
october_fall10
GROUP BY
start_week_date,
member_casual
ORDER BY
start_week_date DESC;
WITH november_fall11 AS
(SELECT
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_lenght,
member_casual
FROM
`ciclystic.cyclistic_seasonal_analysis.fall_202011` AS fall_analysis11
ORDER BY
started_at DESC)
SELECT
member_casual,
start_week_date,
COUNT (member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
) AS avg_start_time
FROM
november_fall11
GROUP BY
start_week_date,
member_casual
ORDER BY
start_week_date DESC;
WITH december_fall12 AS
(SELECT
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_lenght,
member_casual
FROM
`ciclystic.cyclistic_seasonal_analysis.fall_202012` AS fall_analysis11
ORDER BY
started_at DESC)
SELECT
member_casual,
start_week_date,
COUNT (member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
) AS avg_start_time
FROM
december_fall12
GROUP BY
start_week_date,
member_casual
ORDER BY
start_week_date DESC;
--This is Q2 from where I want to get the full combination--
SELECT
october_fall10.member_casual,
october_fall10.start_week_date,
october_fall10.member_casual_start,
october_fall10.avg_start_time,
november_fall11.member_casual,
november_fall11.start_week_date,
november_fall11.member_casual_start,
november_fall11.avg_start_time,
december_fall12.member_casual,
december_fall12.start_week_date,
december_fall12.member_casual_start,
december_fall12.avg_start_time
FROM
( october_fall10 JOIN (november_fall11 JOIN december_fall12 USING (member_casual))
USING
(member_casual) )
首先,当您使用 WITH
创建 'table' 时,它不是临时 table,而是通用 Table 表达式 (CTE)。理解不同的术语很有用,因为它们的行为方式不同 (就像混淆汽油和柴油一样不太好).
它们的不同之处之一是 CTE 不会以任何方式持续存在。进行最终查询后,CTE 超出范围,无法再次引用 (在 [=13= 之后,CTE 为 'gone').
这意味着您想要的模式是...
WITH
october_fall10 AS
(
SELECT
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_lenght,
member_casual
FROM
`ciclystic.cyclistic_seasonal_analysis.fall_202010` AS fall_analysis
),
october_fall10_aggregate AS
(
SELECT
member_casual,
start_week_date,
COUNT(member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
)
AS avg_start_time
FROM
october_fall10
GROUP BY
start_week_date,
member_casual
),
november_fall11 AS
(
<your query here>
),
november_fall11_aggregate AS
(
<your query here>
),
december_fall12 AS
(
<your query here>
),
december_fall12_aggregate AS
(
<your query here>
),
SELECT
october_fall10_aggregate.member_casual,
october_fall10_aggregate.start_week_date,
october_fall10_aggregate.member_casual_start,
october_fall10_aggregate.avg_start_time,
november_fall11_aggregate.member_casual,
november_fall11_aggregate.start_week_date,
november_fall11_aggregate.member_casual_start,
november_fall11_aggregate.avg_start_time,
december_fall12_aggregate.member_casual,
december_fall12_aggregate.start_week_date,
december_fall12_aggregate.member_casual_start,
december_fall12_aggregate.avg_start_time
FROM
october_fall10_aggregate
JOIN
november_fall11_aggregate
USING (member_casual)
JOIN
december_fall12_aggregate
USING (member_casual)
这会创建带有 _aggregate
后缀的新 CTE,然后在您的最终查询中引用它们。
- 请注意,我还从每个 CTE
中删除了 ORDER BY
- 这是因为 CTE(和 table,临时或其他)不保留该顺序
此外,重复所有这些代码是疯狂的。将所有数据放在一个 table 中并编写一次代码会更好。如果失败,您可以合并 tables 'as-if' 它们是一个 table...
WITH
unioned AS
(
SELECT 202010 AS source_tbl, * FROM `ciclystic.cyclistic_seasonal_analysis.fall_202010`
UNION ALL
SELECT 202011 AS source_tbl, * FROM `ciclystic.cyclistic_seasonal_analysis.fall_202011`
UNION ALL
SELECT 202012 AS source_tbl, * FROM `ciclystic.cyclistic_seasonal_analysis.fall_202012`
),
extraction AS
(
SELECT
source_tbl,
member_casual,
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_length
FROM
unioned
)
SELECT
member_casual,
source_tbl,
start_week_date,
COUNT(member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
)
AS avg_start_time
FROM
extraction
GROUP BY
member_casual,
source_tbl,
start_week_date
ORDER BY
member_casual,
source_tbl,
start_week_date
这 NOT 与您之前的格式相同,但更符合 [=51 的预期模式=].
通过WITH语句得到了三个临时表: "october_fall10", “november_fall11”和 “december_fall12” 现在我想通过变量“member_casual”与他们执行 INNER JOIN(下面的查询),但我收到以下警报 错误消息:Table name "november_fall11" missing dataset while no default dataset is set in the request. 不管我想用“december_fall12”来做,警报都会重复,但参考“december_fall12”
--This is Q1 from where I've got the 3 tables --
WITH october_fall10 AS
(SELECT
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_lenght,
member_casual
FROM
`ciclystic.cyclistic_seasonal_analysis.fall_202010` AS fall_analysis
ORDER BY
started_at DESC)
SELECT
member_casual,
start_week_date,
COUNT (member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
) AS avg_start_time
FROM
october_fall10
GROUP BY
start_week_date,
member_casual
ORDER BY
start_week_date DESC;
WITH november_fall11 AS
(SELECT
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_lenght,
member_casual
FROM
`ciclystic.cyclistic_seasonal_analysis.fall_202011` AS fall_analysis11
ORDER BY
started_at DESC)
SELECT
member_casual,
start_week_date,
COUNT (member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
) AS avg_start_time
FROM
november_fall11
GROUP BY
start_week_date,
member_casual
ORDER BY
start_week_date DESC;
WITH december_fall12 AS
(SELECT
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_lenght,
member_casual
FROM
`ciclystic.cyclistic_seasonal_analysis.fall_202012` AS fall_analysis11
ORDER BY
started_at DESC)
SELECT
member_casual,
start_week_date,
COUNT (member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
) AS avg_start_time
FROM
december_fall12
GROUP BY
start_week_date,
member_casual
ORDER BY
start_week_date DESC;
--This is Q2 from where I want to get the full combination--
SELECT
october_fall10.member_casual,
october_fall10.start_week_date,
october_fall10.member_casual_start,
october_fall10.avg_start_time,
november_fall11.member_casual,
november_fall11.start_week_date,
november_fall11.member_casual_start,
november_fall11.avg_start_time,
december_fall12.member_casual,
december_fall12.start_week_date,
december_fall12.member_casual_start,
december_fall12.avg_start_time
FROM
( october_fall10 JOIN (november_fall11 JOIN december_fall12 USING (member_casual))
USING
(member_casual) )
首先,当您使用 WITH
创建 'table' 时,它不是临时 table,而是通用 Table 表达式 (CTE)。理解不同的术语很有用,因为它们的行为方式不同 (就像混淆汽油和柴油一样不太好).
它们的不同之处之一是 CTE 不会以任何方式持续存在。进行最终查询后,CTE 超出范围,无法再次引用 (在 [=13= 之后,CTE 为 'gone').
这意味着您想要的模式是...
WITH
october_fall10 AS
(
SELECT
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_lenght,
member_casual
FROM
`ciclystic.cyclistic_seasonal_analysis.fall_202010` AS fall_analysis
),
october_fall10_aggregate AS
(
SELECT
member_casual,
start_week_date,
COUNT(member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
)
AS avg_start_time
FROM
october_fall10
GROUP BY
start_week_date,
member_casual
),
november_fall11 AS
(
<your query here>
),
november_fall11_aggregate AS
(
<your query here>
),
december_fall12 AS
(
<your query here>
),
december_fall12_aggregate AS
(
<your query here>
),
SELECT
october_fall10_aggregate.member_casual,
october_fall10_aggregate.start_week_date,
october_fall10_aggregate.member_casual_start,
october_fall10_aggregate.avg_start_time,
november_fall11_aggregate.member_casual,
november_fall11_aggregate.start_week_date,
november_fall11_aggregate.member_casual_start,
november_fall11_aggregate.avg_start_time,
december_fall12_aggregate.member_casual,
december_fall12_aggregate.start_week_date,
december_fall12_aggregate.member_casual_start,
december_fall12_aggregate.avg_start_time
FROM
october_fall10_aggregate
JOIN
november_fall11_aggregate
USING (member_casual)
JOIN
december_fall12_aggregate
USING (member_casual)
这会创建带有 _aggregate
后缀的新 CTE,然后在您的最终查询中引用它们。
- 请注意,我还从每个 CTE 中删除了
- 这是因为 CTE(和 table,临时或其他)不保留该顺序
ORDER BY
此外,重复所有这些代码是疯狂的。将所有数据放在一个 table 中并编写一次代码会更好。如果失败,您可以合并 tables 'as-if' 它们是一个 table...
WITH
unioned AS
(
SELECT 202010 AS source_tbl, * FROM `ciclystic.cyclistic_seasonal_analysis.fall_202010`
UNION ALL
SELECT 202011 AS source_tbl, * FROM `ciclystic.cyclistic_seasonal_analysis.fall_202011`
UNION ALL
SELECT 202012 AS source_tbl, * FROM `ciclystic.cyclistic_seasonal_analysis.fall_202012`
),
extraction AS
(
SELECT
source_tbl,
member_casual,
start_station_name,
end_station_name,
start_station_id,
end_station_id,
EXTRACT (DATE FROM started_at) AS start_date,
EXTRACT(DAYOFWEEK FROM started_at) AS start_week_date,
EXTRACT (TIME FROM started_at) AS start_time,
EXTRACT (DATE FROM ended_at) AS end_date,
EXTRACT(DAYOFWEEK FROM ended_at) AS end_week_date,
EXTRACT (TIME FROM ended_at) AS end_time,
DATETIME_DIFF (ended_at,started_at, MINUTE) AS total_length
FROM
unioned
)
SELECT
member_casual,
source_tbl,
start_week_date,
COUNT(member_casual) AS member_casual_start,
TIME(
EXTRACT(hour FROM AVG(start_time - '0:0:0')),
EXTRACT(minute FROM AVG(start_time - '0:0:0')),
EXTRACT(second FROM AVG(start_time - '0:0:0'))
)
AS avg_start_time
FROM
extraction
GROUP BY
member_casual,
source_tbl,
start_week_date
ORDER BY
member_casual,
source_tbl,
start_week_date
这 NOT 与您之前的格式相同,但更符合 [=51 的预期模式=].