按序列号和按标志分组的 SUM 值
SUM Values by Sequence Number and Group By Flag
我们有一个带有序列号的列表。顺序会中断,然后重新开始。
如下所示,SalesOrderLine
缺少数字 4。
SalesOrder SalesOrderLine MStockCode MPrice MBomFlag
000000000182181 1 901337 0.00000 P
000000000182181 2 901335 2476.90000 C
000000000182181 3 340151 0.00000 C
000000000182181 5 900894 0.00000 P
000000000182181 6 400379 0.00000 C
000000000182181 7 900570 600.90000 C
我要做的是用连续的数字序列总结 MPrice
字段,然后使用 MBomFlag
字段选择“父”。
这应该是上述数据的预期结果。如果值 = P
,将在 MBomFlag
字段上进行任何分组
SalesOrder SalesOrderLine MStockCode MPrice MBomFlag
000000000182181 1 901337 2476.90000 P
000000000182181 5 900894 600.90000 P
攻击此问题的最佳方法是什么?我正在尝试使用 RANK()
、ROW_NUMBER()
、LEAD
和 LAG
找出一些东西,但没有取得多大成功
这是源数据:
CREATE TABLE #SalesOrder (
SalesOrder NVARCHAR(20),
SalesOrderLine INT,
MStockCode INT,
MPrice DECIMAL(18,2),
MBomFlag VARCHAR(1))
INSERT INTO #SalesOrder (SalesOrder, SalesOrderLine, MStockCode, MPrice, MBomFlag)
SELECT '000000000182181', 1, '901337', 0.00000, 'P'
UNION
SELECT '000000000182181', 2, '901335', 2476.90000, 'C'
UNION
SELECT '000000000182181', 3, '340151', 0.00000, 'C'
UNION
SELECT '000000000182181', 5, '900894', 0.00000, 'P'
UNION
SELECT '000000000182181', 6, '400379', 0.00000, 'C'
UNION
SELECT '000000000182181', 7, '900570', 2600.90000, 'C'
SELECT *
FROM #SalesOrder
DROP TABLE #SalesOrder
您可以使用 lag()
来检测 SalesOrderLine 序列中何时出现跳转,并保留出现跳转的总次数 运行。 运行 总跳数相同的行属于同一组。
with u as
(select *, lag(SalesOrderLine) over (order by SalesOrderLine) as
previousSOL
from #SalesOrder),
v as
(select *, sum(case when SalesOrderLine = PreviousSOL+ 1 then 0 else 1
end)
over (order by SalesOrderLine rows unbounded preceding) as jumps
from u
)
select min(case when MBomFlag = 'P' then SalesOrder end) as SalesOrder,
min(case when MBomFlag = 'P' then SalesOrderLine end) as SalesOrderLine,
min(case when MBomFlag = 'P' then MStockCode end) as MStockCode,
sum(MPrice) as Mprice,
'P' as MBomFlag
from v
group by jumps;
请尝试以下解决方案。
SQL
-- DDL and sample data population, start
DECLARE @tbl TABLE (ID INT IDENTITY(1,1) PRIMARY KEY,
SalesOrder NVARCHAR(20),
SalesOrderLine INT,
MStockCode INT,
MPrice DECIMAL(18,2),
MBomFlag VARCHAR(1))
INSERT INTO @tbl (SalesOrder, SalesOrderLine, MStockCode, MPrice, MBomFlag) VALUES
('000000000182181', 1, '901337', 0.00000, 'P'),
('000000000182181', 2, '901335', 2476.90000, 'C'),
('000000000182181', 3, '340151', 0.00000, 'C'),
('000000000182181', 5, '900894', 0.00000, 'P'),
('000000000182181', 6, '400379', 0.00000, 'C'),
('000000000182181', 7, '900570', 600.90000, 'C');
-- DDL and sample data population, end
WITH rs AS
(
SELECT series.*,
ns = SalesOrderLine - id
FROM @tbl AS series
), cte AS
(
SELECT *
, FIRST_VALUE(MStockCode) OVER(PARTITION BY ns ORDER BY id) AS _MStockCode
, FIRST_VALUE(MBomFlag) OVER(PARTITION BY ns ORDER BY id) AS _MBomFlag
FROM rs
)
SELECT MIN(cte.SalesOrder) AS SalesOrder
, MIN(cte.SalesOrderLine) AS SalesOrderLine
, MIN(cte._MStockCode) AS MStockCode
, SUM(cte.MPrice) AS MPrice
, MIN(cte._MBomFlag) AS MBomFlag
FROM cte
GROUP BY ns;
输出
+-----------------+----------------+------------+---------+----------+
| SalesOrder | SalesOrderLine | MStockCode | MPrice | MBomFlag |
+-----------------+----------------+------------+---------+----------+
| 000000000182181 | 1 | 901337 | 2476.90 | P |
| 000000000182181 | 5 | 900894 | 600.90 | P |
+-----------------+----------------+------------+---------+----------+
我假设这种情况发生在每个 SalesOrder
的基础上。这是标准的差距和岛屿。不需要递归查询或临时表:
with data as (
select *, SalesOrderLine - row_number()
over (partition by SalesOrder order by SalesOrderLine) as grp
from #SalesOrder
)
select
SalesOrderLine,
min(SalesOrderLine) as SalesOrderLine,
min(case when MBomFlag = 'P' then MStockCode end) as MStockCode,
min(case when MBomFlag = 'P' then MPrice end) as MPrice,
/* doesn't assume there will be a parent -- also retains column datatype */
min(case when MBomFlag = 'P' then MBomFlag end) as MBomFlag
from data
group by SalesOrder, grp
这是一个经典的空岛问题。
但是,在这种情况下,每个岛的起点都由 P
(或不是 C
的一行)清楚地划定。所以我们不需要 LAG
。
我们只需要为每个岛屿分配一个分组 ID,我们可以使用窗口条件 COUNT
来完成。然后我们简单地按那个 ID 分组。
SELECT
pv.SalesOrder,
SalesOrderLine = MIN(CASE WHEN pv.MBomFlag <> 'C' THEN pv.SalesOrderLine END),
MStockCode = MIN(CASE WHEN pv.MBomFlag <> 'C' THEN pv.MStockCode END),
MPrice = MIN(CASE WHEN pv.MBomFlag <> 'C' THEN pv.MPrice END),
MBomFlag = MIN(CASE WHEN pv.MBomFlag <> 'C' THEN pv.MBomFlag END)
FROM (
SELECT *,
GroupingId = COUNT(NULLIF(t.MBomFlag, 'C')) OVER (PARTITION BY t.SalesOrder ORDER BY t.SalesOrderLine ROWS UNBOUNDED PRECEDING)
FROM @tbl t
) pv
GROUP BY
pv.SalesOrder,
pv.GroupingId;
请注意,如果标志为 C
,则 NULLIF(t.MBomFlag, 'C')
returns 为 null,因此 COUNT
将仅计算 other 行.您也可以使用 COUNT(CASE WHEN t.MBomFlag = 'P' THEN 1 END)
显式编写
数据按 SalesOrderLine 排序。当当前 SalesOrderLine 值与下一个值之间的差异不为 1 时,您每次创建一个新组;在每组中,找到MbomFlag为P的记录,将其Mprice值修改为当前组中Mprice值之和。要在SQL中实现这一点,您需要先使用window函数创建一个标记列,根据该标记列对行进行分组,并使用case 语句获取最终结果。一种常见的替代方法是从数据库中提取原始数据并在 Python 或 SPL 中对其进行处理。 SPL,open-source Java 包,很容易集成到 Java 程序中并生成更简单的代码。它只用三行代码处理计算:
img
我们有一个带有序列号的列表。顺序会中断,然后重新开始。
如下所示,SalesOrderLine
缺少数字 4。
SalesOrder SalesOrderLine MStockCode MPrice MBomFlag
000000000182181 1 901337 0.00000 P
000000000182181 2 901335 2476.90000 C
000000000182181 3 340151 0.00000 C
000000000182181 5 900894 0.00000 P
000000000182181 6 400379 0.00000 C
000000000182181 7 900570 600.90000 C
我要做的是用连续的数字序列总结 MPrice
字段,然后使用 MBomFlag
字段选择“父”。
这应该是上述数据的预期结果。如果值 = P
MBomFlag
字段上进行任何分组
SalesOrder SalesOrderLine MStockCode MPrice MBomFlag
000000000182181 1 901337 2476.90000 P
000000000182181 5 900894 600.90000 P
攻击此问题的最佳方法是什么?我正在尝试使用 RANK()
、ROW_NUMBER()
、LEAD
和 LAG
找出一些东西,但没有取得多大成功
这是源数据:
CREATE TABLE #SalesOrder (
SalesOrder NVARCHAR(20),
SalesOrderLine INT,
MStockCode INT,
MPrice DECIMAL(18,2),
MBomFlag VARCHAR(1))
INSERT INTO #SalesOrder (SalesOrder, SalesOrderLine, MStockCode, MPrice, MBomFlag)
SELECT '000000000182181', 1, '901337', 0.00000, 'P'
UNION
SELECT '000000000182181', 2, '901335', 2476.90000, 'C'
UNION
SELECT '000000000182181', 3, '340151', 0.00000, 'C'
UNION
SELECT '000000000182181', 5, '900894', 0.00000, 'P'
UNION
SELECT '000000000182181', 6, '400379', 0.00000, 'C'
UNION
SELECT '000000000182181', 7, '900570', 2600.90000, 'C'
SELECT *
FROM #SalesOrder
DROP TABLE #SalesOrder
您可以使用 lag()
来检测 SalesOrderLine 序列中何时出现跳转,并保留出现跳转的总次数 运行。 运行 总跳数相同的行属于同一组。
with u as
(select *, lag(SalesOrderLine) over (order by SalesOrderLine) as
previousSOL
from #SalesOrder),
v as
(select *, sum(case when SalesOrderLine = PreviousSOL+ 1 then 0 else 1
end)
over (order by SalesOrderLine rows unbounded preceding) as jumps
from u
)
select min(case when MBomFlag = 'P' then SalesOrder end) as SalesOrder,
min(case when MBomFlag = 'P' then SalesOrderLine end) as SalesOrderLine,
min(case when MBomFlag = 'P' then MStockCode end) as MStockCode,
sum(MPrice) as Mprice,
'P' as MBomFlag
from v
group by jumps;
请尝试以下解决方案。
SQL
-- DDL and sample data population, start
DECLARE @tbl TABLE (ID INT IDENTITY(1,1) PRIMARY KEY,
SalesOrder NVARCHAR(20),
SalesOrderLine INT,
MStockCode INT,
MPrice DECIMAL(18,2),
MBomFlag VARCHAR(1))
INSERT INTO @tbl (SalesOrder, SalesOrderLine, MStockCode, MPrice, MBomFlag) VALUES
('000000000182181', 1, '901337', 0.00000, 'P'),
('000000000182181', 2, '901335', 2476.90000, 'C'),
('000000000182181', 3, '340151', 0.00000, 'C'),
('000000000182181', 5, '900894', 0.00000, 'P'),
('000000000182181', 6, '400379', 0.00000, 'C'),
('000000000182181', 7, '900570', 600.90000, 'C');
-- DDL and sample data population, end
WITH rs AS
(
SELECT series.*,
ns = SalesOrderLine - id
FROM @tbl AS series
), cte AS
(
SELECT *
, FIRST_VALUE(MStockCode) OVER(PARTITION BY ns ORDER BY id) AS _MStockCode
, FIRST_VALUE(MBomFlag) OVER(PARTITION BY ns ORDER BY id) AS _MBomFlag
FROM rs
)
SELECT MIN(cte.SalesOrder) AS SalesOrder
, MIN(cte.SalesOrderLine) AS SalesOrderLine
, MIN(cte._MStockCode) AS MStockCode
, SUM(cte.MPrice) AS MPrice
, MIN(cte._MBomFlag) AS MBomFlag
FROM cte
GROUP BY ns;
输出
+-----------------+----------------+------------+---------+----------+
| SalesOrder | SalesOrderLine | MStockCode | MPrice | MBomFlag |
+-----------------+----------------+------------+---------+----------+
| 000000000182181 | 1 | 901337 | 2476.90 | P |
| 000000000182181 | 5 | 900894 | 600.90 | P |
+-----------------+----------------+------------+---------+----------+
我假设这种情况发生在每个 SalesOrder
的基础上。这是标准的差距和岛屿。不需要递归查询或临时表:
with data as (
select *, SalesOrderLine - row_number()
over (partition by SalesOrder order by SalesOrderLine) as grp
from #SalesOrder
)
select
SalesOrderLine,
min(SalesOrderLine) as SalesOrderLine,
min(case when MBomFlag = 'P' then MStockCode end) as MStockCode,
min(case when MBomFlag = 'P' then MPrice end) as MPrice,
/* doesn't assume there will be a parent -- also retains column datatype */
min(case when MBomFlag = 'P' then MBomFlag end) as MBomFlag
from data
group by SalesOrder, grp
这是一个经典的空岛问题。
但是,在这种情况下,每个岛的起点都由 P
(或不是 C
的一行)清楚地划定。所以我们不需要 LAG
。
我们只需要为每个岛屿分配一个分组 ID,我们可以使用窗口条件 COUNT
来完成。然后我们简单地按那个 ID 分组。
SELECT
pv.SalesOrder,
SalesOrderLine = MIN(CASE WHEN pv.MBomFlag <> 'C' THEN pv.SalesOrderLine END),
MStockCode = MIN(CASE WHEN pv.MBomFlag <> 'C' THEN pv.MStockCode END),
MPrice = MIN(CASE WHEN pv.MBomFlag <> 'C' THEN pv.MPrice END),
MBomFlag = MIN(CASE WHEN pv.MBomFlag <> 'C' THEN pv.MBomFlag END)
FROM (
SELECT *,
GroupingId = COUNT(NULLIF(t.MBomFlag, 'C')) OVER (PARTITION BY t.SalesOrder ORDER BY t.SalesOrderLine ROWS UNBOUNDED PRECEDING)
FROM @tbl t
) pv
GROUP BY
pv.SalesOrder,
pv.GroupingId;
请注意,如果标志为 C
,则 NULLIF(t.MBomFlag, 'C')
returns 为 null,因此 COUNT
将仅计算 other 行.您也可以使用 COUNT(CASE WHEN t.MBomFlag = 'P' THEN 1 END)
数据按 SalesOrderLine 排序。当当前 SalesOrderLine 值与下一个值之间的差异不为 1 时,您每次创建一个新组;在每组中,找到MbomFlag为P的记录,将其Mprice值修改为当前组中Mprice值之和。要在SQL中实现这一点,您需要先使用window函数创建一个标记列,根据该标记列对行进行分组,并使用case 语句获取最终结果。一种常见的替代方法是从数据库中提取原始数据并在 Python 或 SPL 中对其进行处理。 SPL,open-source Java 包,很容易集成到 Java 程序中并生成更简单的代码。它只用三行代码处理计算: img