Google BigQuery SQL:独立排序两列
Google BigQuery SQL: Order two columns independently
假设我有一些数据,例如:
grp v1 v2
--- -- --
2 5 7
2 4 9
3 10 2
3 11 1
我想创建独立于 table 顺序的新列 - 这样两列就有独立的顺序,即独立于 v2 按 v1 排序,同时按 grp 分区。
结果(独立排序,按 grp 分区)将是:
grp v1 v2 v1_ordered v2_ordered
--- -- -- ---------- ----------
2 5 7 4 7
2 4 9 5 9
3 10 2 10 1
3 11 1 11 2
一种方法是创建两个 table 和 CROSS JOIN。但是,我处理的数据行太多,因此无法进行计算跟踪table - 有没有一种方法可以在没有 JOIN 的情况下在单个查询中执行此操作?
基本上,我想这样写 SQL:
SELECT
*,
v1 OVER (PARTITION BY grp ORDER BY v1 ASC) as v1_ordered,
v2 OVER (PARTITION BY grp ORDER BY v2 ASC) as v2_ordered
FROM [example_table]
这破坏了 table 行的含义,但它是许多应用程序所必需的功能 - 例如计算两个字段之间的有序相关性 CORR(v1_ordered, v2_ordered).
这可能吗?
我认为你的方向是对的!您只需要使用适当的 window 函数即可。 Row_number() 在这种情况下。它应该有效!
根据@cgn 的要求添加工作示例:
我认为没有办法完全避免使用 JOIN。
同时,下面的示例仅使用 ONE JOIN 与其他答案中的 TWO JOINs:
SELECT
a.grp AS grp,
a.v1 AS v1,
a.v2 AS v2,
a.v1 AS v1_ordered,
b.v2 AS v2_ordered
FROM (
SELECT grp, v1, v2, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY v1) AS v1_order
FROM [example_table]
) AS a
JOIN (
SELECT grp, v1, v2, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY v2) AS v2_order
FROM [example_table]
) AS b
ON a.grp = b.grp AND a.v1_order = b.v2_order
结果符合预期:
grp v1 v2 v1_ordered v2_ordered
2 4 9 4 7
2 5 7 5 9
3 10 2 10 1
3 11 1 11 2
现在您可以像下面那样使用 CORR()
SELECT grp, CORR(v1_ordered, v2_ordered) AS [corr]
FROM (
SELECT
a.grp AS grp,
a.v1 AS v1,
a.v2 AS v2,
a.v1 AS v1_ordered,
b.v2 AS v2_ordered
FROM (
SELECT grp, v1, v2, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY v1) AS v1_order
FROM [example_table]
) AS a
JOIN (
SELECT grp, v1, v2, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY v2) AS v2_order
FROM [example_table]
) AS b
ON a.grp = b.grp AND a.v1_order = b.v2_order
)
GROUP BY grp
AI 不能 100% 确定这在 BigQuery 中有效,但可以这样说:
select e.*, ev1.v1, ev2.v2
from (select e.*,
row_number() over (partition by grp order by v1) as seqnum_v1,
row_number() over (partition by grp order by v2) as seqnum_v2
from example e
) e join
(select e.*, row_number() over (partition by grp order by v1) as seqnum_v1
from example e
) ev1
on ev1.grp = e.grp and ev1.seqnum_v1 = e.seqnum_v1 join
(select e.*, row_number() over (partition by grp order by v2) as seqnum_v2
from example e
) ev2
on ev2.grp = e.grp and ev2.seqnum_v2 = e.seqnum_v2;
我们的想法是为每一列分配一个独立的顺序。然后再加入原来的table得到实际值。
这对你有用。
注意:您在示例中提到的顺序并不是从数据库返回行的方式所必需的。就我而言,对于 v1
,我得到的 4,5,10,11
与您的 5,4,10,11
不同。但是,您的输出将与您想要的相同。
Select t.grp,t.v1,t.v2,
v1.v1 as v1_ordered,v2.v2 as v2_ordered
From
(
select t1.*,
row_number() over (partition by grp
Order by v1) v1o
,
row_number() over (partition by grp
Order by v2) v2o
from table1 t1
) t
Inner join
(
Select t.*,
row_number() over (partition by grp
Order by v1) v1o
From table1 t
) v1
On t.grp=v1.grp
And t.v1o=v1.v1o
Inner join
(
Select t.*,
row_number() over (partition by grp
Order by v2) v2o
From table1 t
) v2
On t.grp=v2.grp
And t.v1o=v2.v2o
输出:
+------+-----+-----+-------------+------------+
| grp | v1 | v2 | v1_ordered | v2_ordered |
+------+-----+-----+-------------+------------+
| 2 | 4 | 9 | 4 | 7 |
| 2 | 5 | 7 | 5 | 9 |
| 3 | 10 | 2 | 10 | 1 |
| 3 | 11 | 1 | 11 | 2 |
+------+-----+-----+-------------+------------+
假设我有一些数据,例如:
grp v1 v2
--- -- --
2 5 7
2 4 9
3 10 2
3 11 1
我想创建独立于 table 顺序的新列 - 这样两列就有独立的顺序,即独立于 v2 按 v1 排序,同时按 grp 分区。
结果(独立排序,按 grp 分区)将是:
grp v1 v2 v1_ordered v2_ordered
--- -- -- ---------- ----------
2 5 7 4 7
2 4 9 5 9
3 10 2 10 1
3 11 1 11 2
一种方法是创建两个 table 和 CROSS JOIN。但是,我处理的数据行太多,因此无法进行计算跟踪table - 有没有一种方法可以在没有 JOIN 的情况下在单个查询中执行此操作?
基本上,我想这样写 SQL:
SELECT
*,
v1 OVER (PARTITION BY grp ORDER BY v1 ASC) as v1_ordered,
v2 OVER (PARTITION BY grp ORDER BY v2 ASC) as v2_ordered
FROM [example_table]
这破坏了 table 行的含义,但它是许多应用程序所必需的功能 - 例如计算两个字段之间的有序相关性 CORR(v1_ordered, v2_ordered).
这可能吗?
我认为你的方向是对的!您只需要使用适当的 window 函数即可。 Row_number() 在这种情况下。它应该有效!
根据@cgn 的要求添加工作示例:
我认为没有办法完全避免使用 JOIN。
同时,下面的示例仅使用 ONE JOIN 与其他答案中的 TWO JOINs:
SELECT
a.grp AS grp,
a.v1 AS v1,
a.v2 AS v2,
a.v1 AS v1_ordered,
b.v2 AS v2_ordered
FROM (
SELECT grp, v1, v2, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY v1) AS v1_order
FROM [example_table]
) AS a
JOIN (
SELECT grp, v1, v2, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY v2) AS v2_order
FROM [example_table]
) AS b
ON a.grp = b.grp AND a.v1_order = b.v2_order
结果符合预期:
grp v1 v2 v1_ordered v2_ordered
2 4 9 4 7
2 5 7 5 9
3 10 2 10 1
3 11 1 11 2
现在您可以像下面那样使用 CORR()
SELECT grp, CORR(v1_ordered, v2_ordered) AS [corr]
FROM (
SELECT
a.grp AS grp,
a.v1 AS v1,
a.v2 AS v2,
a.v1 AS v1_ordered,
b.v2 AS v2_ordered
FROM (
SELECT grp, v1, v2, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY v1) AS v1_order
FROM [example_table]
) AS a
JOIN (
SELECT grp, v1, v2, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY v2) AS v2_order
FROM [example_table]
) AS b
ON a.grp = b.grp AND a.v1_order = b.v2_order
)
GROUP BY grp
AI 不能 100% 确定这在 BigQuery 中有效,但可以这样说:
select e.*, ev1.v1, ev2.v2
from (select e.*,
row_number() over (partition by grp order by v1) as seqnum_v1,
row_number() over (partition by grp order by v2) as seqnum_v2
from example e
) e join
(select e.*, row_number() over (partition by grp order by v1) as seqnum_v1
from example e
) ev1
on ev1.grp = e.grp and ev1.seqnum_v1 = e.seqnum_v1 join
(select e.*, row_number() over (partition by grp order by v2) as seqnum_v2
from example e
) ev2
on ev2.grp = e.grp and ev2.seqnum_v2 = e.seqnum_v2;
我们的想法是为每一列分配一个独立的顺序。然后再加入原来的table得到实际值。
这对你有用。
注意:您在示例中提到的顺序并不是从数据库返回行的方式所必需的。就我而言,对于 v1
,我得到的 4,5,10,11
与您的 5,4,10,11
不同。但是,您的输出将与您想要的相同。
Select t.grp,t.v1,t.v2,
v1.v1 as v1_ordered,v2.v2 as v2_ordered
From
(
select t1.*,
row_number() over (partition by grp
Order by v1) v1o
,
row_number() over (partition by grp
Order by v2) v2o
from table1 t1
) t
Inner join
(
Select t.*,
row_number() over (partition by grp
Order by v1) v1o
From table1 t
) v1
On t.grp=v1.grp
And t.v1o=v1.v1o
Inner join
(
Select t.*,
row_number() over (partition by grp
Order by v2) v2o
From table1 t
) v2
On t.grp=v2.grp
And t.v1o=v2.v2o
输出:
+------+-----+-----+-------------+------------+
| grp | v1 | v2 | v1_ordered | v2_ordered |
+------+-----+-----+-------------+------------+
| 2 | 4 | 9 | 4 | 7 |
| 2 | 5 | 7 | 5 | 9 |
| 3 | 10 | 2 | 10 | 1 |
| 3 | 11 | 1 | 11 | 2 |
+------+-----+-----+-------------+------------+