通过从子集中的列中查找唯一字符串来派生列
Derive Column By Finding Unique String From Column in a Subset
这个问题背后的想法与类似,只是略有不同。考虑以下数据
实际输出
+-------------+--------+------+
| Id | Weight | Type |
+-------------+--------+------+
| 00011223344 | 35 | A |
| 00011223344 | 10 | A |
| 12311223344 | 100 | B |
| 00034343434 | 25 | A |
| 00034343434 | 25 | A |
| 99934343434 | 200 | C |
| 88855667788 | 100 | D |
+-------------+--------+------+
派生输出
+-------------+-------------+--------+---------------+------+
| Id | Actual ID | Weight | Actual Weight | Type |
+-------------+-------------+--------+---------------+------+
| 00011223344 | 12311223344 | 35 | 35 | A |
| 00011223344 | 12311223344 | 10 | 10 | A |
| 12311223344 | 12311223344 | 100 | 55 | B |
| 00034343434 | 99934343434 | 25 | 25 | A |
| 00034343434 | 99934343434 | 25 | 25 | A |
| 99934343434 | 99934343434 | 200 | 150 | C |
| 88855667788 | 88855667788 | 100 | 100 | D |
+-------------+-------------+--------+---------------+------+
我需要在上面的 table 中添加另一列,以针对列类型添加 Actual_Type
。上面的table可以通过下面的代码推导也提供了.
select t.*,
(case when id like '000%' then weight
else weight - sum(case when id like '000%' then weight else 0 end) over (partition by actual_id)
end) as actual_weight
from (select t.*,
max(id) over (partition by stuff(id, 1, 3, '')) as actual_id
from t
) t;
我想不出如何在类型为 varchar 的列上应用分区依据和应用最大值。
期望输出
+-------------+-------------+--------+---------------+------+-------------+
| Id | Actual ID | Weight | Actual Weight | Type | Actual Type |
+-------------+-------------+--------+---------------+------+-------------+
| 00011223344 | 12311223344 | 35 | 35 | A | B |
| 00011223344 | 12311223344 | 10 | 10 | A | B |
| 12311223344 | 12311223344 | 100 | 55 | B | B |
| 00034343434 | 99934343434 | 25 | 25 | A | C |
| 00034343434 | 99934343434 | 25 | 25 | A | C |
| 99934343434 | 99934343434 | 200 | 150 | C | C |
| 88855667788 | 88855667788 | 100 | 100 | D | D |
+-------------+-------------+--------+---------------+------+-------------+
编辑:
我无法将 min
或 max
应用于列 Type
,即 nvarchar
。我想在列 Actual ID
上进行分区,并在列 Type
中找到唯一的值,即第 3 行的 B
。接下来的 3 个是 C.
我在您的原始查询中添加了另一列,其分区逻辑与您用来创建 actual_id 列的逻辑相同。
架构:
create table t (ID varchar(20), Weight int , Type varchar(20));
insert into t values( '00011223344' , 35 , 'A');
insert into t values( '00011223344' , 10 , 'A');
insert into t values('12311223344' , 100 , 'B');
insert into t values('00034343434' , 25 , 'A');
insert into t values('00034343434' , 25 , 'A');
insert into t values('99934343434' , 200 , 'C');
insert into t values('88855667788' , 100 , 'D');
查询:
select t.*,
(case when id like '000%' then weight
else weight - sum(case when id like '000%' then weight else 0 end) over (partition by actual_id)
end) as actual_weight,
max(type)over (partition by stuff(id, 1, 3, ''))
from (select t.*,
max(id) over (partition by stuff(id, 1, 3, '')) as actual_id
from t
) t;
输出:
ID
Weight
Type
actual_id
actual_weight
(No column name)
00011223344
35
A
12311223344
35
B
00011223344
10
A
12311223344
10
B
12311223344
100
B
12311223344
55
B
00034343434
25
A
99934343434
25
C
00034343434
25
A
99934343434
25
C
99934343434
200
C
99934343434
150
C
88855667788
100
D
88855667788
100
D
dbhere
使用FIRST_VALUE()
window函数:
SELECT t.Id, t.Weight, t.Type, t.actual_id, t.actual_weight,
FIRST_VALUE(Type) OVER (PARTITION BY actual_id ORDER BY counter) AS actual_type
FROM (
SELECT t.*,
CASE
WHEN id like '000%' THEN weight
ELSE weight - SUM(CASE WHEN id LIKE '000%' THEN WEIGHT ELSE 0 END) OVER (PARTITION BY actual_id)
END AS actual_weight,
COUNT(*) OVER (PARTITION BY actual_id, Type) counter
FROM (
SELECT t.*, MAX(id) OVER (PARTITION BY STUFF(id, 1, 3, '')) AS actual_id
FROM t
) t
) t;
参见demo。
这个问题背后的想法与
实际输出
+-------------+--------+------+
| Id | Weight | Type |
+-------------+--------+------+
| 00011223344 | 35 | A |
| 00011223344 | 10 | A |
| 12311223344 | 100 | B |
| 00034343434 | 25 | A |
| 00034343434 | 25 | A |
| 99934343434 | 200 | C |
| 88855667788 | 100 | D |
+-------------+--------+------+
派生输出
+-------------+-------------+--------+---------------+------+
| Id | Actual ID | Weight | Actual Weight | Type |
+-------------+-------------+--------+---------------+------+
| 00011223344 | 12311223344 | 35 | 35 | A |
| 00011223344 | 12311223344 | 10 | 10 | A |
| 12311223344 | 12311223344 | 100 | 55 | B |
| 00034343434 | 99934343434 | 25 | 25 | A |
| 00034343434 | 99934343434 | 25 | 25 | A |
| 99934343434 | 99934343434 | 200 | 150 | C |
| 88855667788 | 88855667788 | 100 | 100 | D |
+-------------+-------------+--------+---------------+------+
我需要在上面的 table 中添加另一列,以针对列类型添加 Actual_Type
。上面的table可以通过下面的代码推导也提供了
select t.*,
(case when id like '000%' then weight
else weight - sum(case when id like '000%' then weight else 0 end) over (partition by actual_id)
end) as actual_weight
from (select t.*,
max(id) over (partition by stuff(id, 1, 3, '')) as actual_id
from t
) t;
我想不出如何在类型为 varchar 的列上应用分区依据和应用最大值。
期望输出
+-------------+-------------+--------+---------------+------+-------------+
| Id | Actual ID | Weight | Actual Weight | Type | Actual Type |
+-------------+-------------+--------+---------------+------+-------------+
| 00011223344 | 12311223344 | 35 | 35 | A | B |
| 00011223344 | 12311223344 | 10 | 10 | A | B |
| 12311223344 | 12311223344 | 100 | 55 | B | B |
| 00034343434 | 99934343434 | 25 | 25 | A | C |
| 00034343434 | 99934343434 | 25 | 25 | A | C |
| 99934343434 | 99934343434 | 200 | 150 | C | C |
| 88855667788 | 88855667788 | 100 | 100 | D | D |
+-------------+-------------+--------+---------------+------+-------------+
编辑:
我无法将 min
或 max
应用于列 Type
,即 nvarchar
。我想在列 Actual ID
上进行分区,并在列 Type
中找到唯一的值,即第 3 行的 B
。接下来的 3 个是 C.
我在您的原始查询中添加了另一列,其分区逻辑与您用来创建 actual_id 列的逻辑相同。
架构:
create table t (ID varchar(20), Weight int , Type varchar(20));
insert into t values( '00011223344' , 35 , 'A');
insert into t values( '00011223344' , 10 , 'A');
insert into t values('12311223344' , 100 , 'B');
insert into t values('00034343434' , 25 , 'A');
insert into t values('00034343434' , 25 , 'A');
insert into t values('99934343434' , 200 , 'C');
insert into t values('88855667788' , 100 , 'D');
查询:
select t.*,
(case when id like '000%' then weight
else weight - sum(case when id like '000%' then weight else 0 end) over (partition by actual_id)
end) as actual_weight,
max(type)over (partition by stuff(id, 1, 3, ''))
from (select t.*,
max(id) over (partition by stuff(id, 1, 3, '')) as actual_id
from t
) t;
输出:
ID | Weight | Type | actual_id | actual_weight | (No column name) |
---|---|---|---|---|---|
00011223344 | 35 | A | 12311223344 | 35 | B |
00011223344 | 10 | A | 12311223344 | 10 | B |
12311223344 | 100 | B | 12311223344 | 55 | B |
00034343434 | 25 | A | 99934343434 | 25 | C |
00034343434 | 25 | A | 99934343434 | 25 | C |
99934343434 | 200 | C | 99934343434 | 150 | C |
88855667788 | 100 | D | 88855667788 | 100 | D |
db
使用FIRST_VALUE()
window函数:
SELECT t.Id, t.Weight, t.Type, t.actual_id, t.actual_weight,
FIRST_VALUE(Type) OVER (PARTITION BY actual_id ORDER BY counter) AS actual_type
FROM (
SELECT t.*,
CASE
WHEN id like '000%' THEN weight
ELSE weight - SUM(CASE WHEN id LIKE '000%' THEN WEIGHT ELSE 0 END) OVER (PARTITION BY actual_id)
END AS actual_weight,
COUNT(*) OVER (PARTITION BY actual_id, Type) counter
FROM (
SELECT t.*, MAX(id) OVER (PARTITION BY STUFF(id, 1, 3, '')) AS actual_id
FROM t
) t
) t;
参见demo。