如何为每次出现的特定值创建新行
How to create new rows for every occurrence of a particular value
我需要一些指导来解决这个问题issue.I有一个类似于下面的数据集
Record_type Record_Text
H01 ABCDEFGHI123456789
D45 BCDEFGH098765
D20 BABRTHYUHU56789
D30 QWERTY09876558471255
D12 ASDFGHJ9814752
H02 UGHRYCGDF12304025
G80 YHNBGTRFV0147852
H01 MLOPKIJUHNB624817
D20 PLKIJUNHMY7653235
H15 MVNBDGETDGSTEX9874
D30 GNHGDTBFJVNV834687
H02 JDGHKDGHSDFIG7845387
D60 GHCNDBDGCTEF45367
H01 的每次出现都会启动一个新事务。基于以上示例,有两个事务(H01 到 G80 和 H01 到 D60)。
我需要根据每笔交易的某些条件 select 来自 RECORD_TEXT 字段的某些字符。
我尝试使用以下代码单独进行第一笔交易
SELECT
( MAX(CASE WHEN RECORD_TYPE = 'H01' THEN (SUBSTR(RECORD_TEXT,1,10)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'D20' THEN ',' || (SUBSTR(RECORD_TEXT,2,3)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'D30' THEN ',' || (SUBSTR(RECORD_TEXT,9,8)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'H02' THEN ',' || (SUBSTR(RECORD_TEXT,13,4)) END)) AS TOTAL_FIELD
FROM TABLE
我得到了预期的输出。
ABCDEFGHI1,ABR,87655847,0402
但我无法进行后续交易。
我对上述示例的预期输出(两个事务 = 两行)将是
ABCDEFGHI1,ABR,87655847,0402
MLOPKIJUHN,LKI,JVNV8346,G784
总共有大约200笔交易。我正在使用 Teradata 版本 14。请帮忙。
这是一个分析功能的工作,我对teradata不熟悉,但应该与其他地方提供的类似
查找分区依据,检查此 link
http://www.tutorialspoint.com/teradata/teradata_partitioned_primary_index.htm
你基本上可以按照你想要的方式切片你的数据
所以你会做这样的事情
分区依据Record_type
除非您按照 Tyron78 的建议添加列,否则您还必须使用其他分析函数来创建算法来确定中间记录属于该集合。
希望对您有所帮助
DECLARE @t table(
CREATE_TMSP int, Record_type nvarchar(20), Record_Text nvarchar(50)
);
INSERT INTO @t VALUES(1,'H01','ABCDEFGHI123456789');
INSERT INTO @t VALUES(2,'D45','BCDEFGH098765');
INSERT INTO @t VALUES(3,'D20','BABRTHYUHU56789');
INSERT INTO @t VALUES(4,'D30','QWERTY09876558471255');
INSERT INTO @t VALUES(5,'D12','ASDFGHJ9814752');
INSERT INTO @t VALUES(6,'H02','UGHRYCGDF12304025');
INSERT INTO @t VALUES(7,'G80','YHNBGTRFV0147852');
INSERT INTO @t VALUES(8,'H01','MLOPKIJUHNB624817');
INSERT INTO @t VALUES(9,'D20','PLKIJUNHMY7653235');
INSERT INTO @t VALUES(10,'H15','MVNBDGETDGSTEX9874');
INSERT INTO @t VALUES(11,'D30','GNHGDTBFJVNV834687');
INSERT INTO @t VALUES(12,'H02','JDGHKDGHSDFIG7845387');
INSERT INTO @t VALUES(13,'D60','GHCNDBDGCTEF45367');
WITH cte AS(
SELECT RECORD_TYPE, RECORD_TEXT, DENSE_RANK() OVER(ORDER BY CREATE_TMSP) AS DERIVED_COLUMN
FROM @t
),
cteLead AS(
SELECT Record_Type, Record_Text, DERIVED_COLUMN AS DERIVED_COLUMN_LEFT, ISNULL(LEAD(DERIVED_COLUMN) OVER (ORDER BY DERIVED_COLUMN), 999999) AS DERIVED_COLUMN_RIGHT
FROM cte
WHERE Record_type = 'H01'
),
cteSplit AS(
SELECT a.DERIVED_COLUMN_LEFT AS ID, a.Record_Type AS RecordTypeHead, a.Record_Text AS RecordTextHead, a.DERIVED_COLUMN_LEFT, a.DERIVED_COLUMN_RIGHT,
b.Record_Type,
CASE
WHEN b.Record_type = 'H01' THEN SUBSTRING(b.RECORD_TEXT,1,10)
WHEN b.Record_type = 'D20' THEN SUBSTRING(b.RECORD_TEXT,2,3)
WHEN b.Record_type = 'D30' THEN SUBSTRING(b.RECORD_TEXT,9,8)
WHEN b.Record_type = 'H02' THEN SUBSTRING(b.RECORD_TEXT,13,4)
END AS RecordTextSplit
FROM cteLead AS a
JOIN cte AS b ON b.DERIVED_COLUMN >= a.DERIVED_COLUMN_LEFT AND b.DERIVED_COLUMN < a.DERIVED_COLUMN_RIGHT
WHERE b.Record_type IN ('H01', 'D20', 'D30', 'H02')
)
--
SELECT * FROM cteSplit
PIVOT
(
MAX(RecordTextSplit)
FOR Record_Type IN (H01, D20, D30, H02)
) AS pvt
添加时间戳列后,可以轻松地为每笔交易分配唯一编号。然后您可以应用您现有的计算:
SELECT
trans#,
MAX(CASE WHEN RECORD_TYPE = 'H01' THEN (SUBSTR(RECORD_TEXT, 1,10)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'D20' THEN ',' || (SUBSTR(RECORD_TEXT, 2, 3)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'D30' THEN ',' || (SUBSTR(RECORD_TEXT, 9, 8)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'H02' THEN ',' || (SUBSTR(RECORD_TEXT,13, 4)) END)) AS TOTAL_FIELD
FROM
(
SELECT CREATE_TMSP,RECORD_TYPE, RECORD_TEXT,
-- assign a unique number to each transaction
SUM(CASE WHEN Record_type = 'H01' THEN 1 ELSE 0 END)
OVER (ORDER BY CREATE_TMSP
ROWS UNBOUNDED PRECEDING) AS trans#
FROM table
-- more efficient to filter unneeded data before the OLAP function
WHERE RECORD_TYPE IN ('H01','D20','D30','H02')
-- uncomment if the data doesn't start with an 'H01' row and you don't want partial transactions
-- QUALIFY trans# > 0
) AS dt
GROUP BY trans#
我需要一些指导来解决这个问题issue.I有一个类似于下面的数据集
Record_type Record_Text
H01 ABCDEFGHI123456789
D45 BCDEFGH098765
D20 BABRTHYUHU56789
D30 QWERTY09876558471255
D12 ASDFGHJ9814752
H02 UGHRYCGDF12304025
G80 YHNBGTRFV0147852
H01 MLOPKIJUHNB624817
D20 PLKIJUNHMY7653235
H15 MVNBDGETDGSTEX9874
D30 GNHGDTBFJVNV834687
H02 JDGHKDGHSDFIG7845387
D60 GHCNDBDGCTEF45367
H01 的每次出现都会启动一个新事务。基于以上示例,有两个事务(H01 到 G80 和 H01 到 D60)。
我需要根据每笔交易的某些条件 select 来自 RECORD_TEXT 字段的某些字符。 我尝试使用以下代码单独进行第一笔交易
SELECT
( MAX(CASE WHEN RECORD_TYPE = 'H01' THEN (SUBSTR(RECORD_TEXT,1,10)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'D20' THEN ',' || (SUBSTR(RECORD_TEXT,2,3)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'D30' THEN ',' || (SUBSTR(RECORD_TEXT,9,8)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'H02' THEN ',' || (SUBSTR(RECORD_TEXT,13,4)) END)) AS TOTAL_FIELD
FROM TABLE
我得到了预期的输出。
ABCDEFGHI1,ABR,87655847,0402
但我无法进行后续交易。
我对上述示例的预期输出(两个事务 = 两行)将是
ABCDEFGHI1,ABR,87655847,0402
MLOPKIJUHN,LKI,JVNV8346,G784
总共有大约200笔交易。我正在使用 Teradata 版本 14。请帮忙。
这是一个分析功能的工作,我对teradata不熟悉,但应该与其他地方提供的类似
查找分区依据,检查此 link
http://www.tutorialspoint.com/teradata/teradata_partitioned_primary_index.htm
你基本上可以按照你想要的方式切片你的数据 所以你会做这样的事情
分区依据Record_type
除非您按照 Tyron78 的建议添加列,否则您还必须使用其他分析函数来创建算法来确定中间记录属于该集合。
希望对您有所帮助
DECLARE @t table(
CREATE_TMSP int, Record_type nvarchar(20), Record_Text nvarchar(50)
);
INSERT INTO @t VALUES(1,'H01','ABCDEFGHI123456789');
INSERT INTO @t VALUES(2,'D45','BCDEFGH098765');
INSERT INTO @t VALUES(3,'D20','BABRTHYUHU56789');
INSERT INTO @t VALUES(4,'D30','QWERTY09876558471255');
INSERT INTO @t VALUES(5,'D12','ASDFGHJ9814752');
INSERT INTO @t VALUES(6,'H02','UGHRYCGDF12304025');
INSERT INTO @t VALUES(7,'G80','YHNBGTRFV0147852');
INSERT INTO @t VALUES(8,'H01','MLOPKIJUHNB624817');
INSERT INTO @t VALUES(9,'D20','PLKIJUNHMY7653235');
INSERT INTO @t VALUES(10,'H15','MVNBDGETDGSTEX9874');
INSERT INTO @t VALUES(11,'D30','GNHGDTBFJVNV834687');
INSERT INTO @t VALUES(12,'H02','JDGHKDGHSDFIG7845387');
INSERT INTO @t VALUES(13,'D60','GHCNDBDGCTEF45367');
WITH cte AS(
SELECT RECORD_TYPE, RECORD_TEXT, DENSE_RANK() OVER(ORDER BY CREATE_TMSP) AS DERIVED_COLUMN
FROM @t
),
cteLead AS(
SELECT Record_Type, Record_Text, DERIVED_COLUMN AS DERIVED_COLUMN_LEFT, ISNULL(LEAD(DERIVED_COLUMN) OVER (ORDER BY DERIVED_COLUMN), 999999) AS DERIVED_COLUMN_RIGHT
FROM cte
WHERE Record_type = 'H01'
),
cteSplit AS(
SELECT a.DERIVED_COLUMN_LEFT AS ID, a.Record_Type AS RecordTypeHead, a.Record_Text AS RecordTextHead, a.DERIVED_COLUMN_LEFT, a.DERIVED_COLUMN_RIGHT,
b.Record_Type,
CASE
WHEN b.Record_type = 'H01' THEN SUBSTRING(b.RECORD_TEXT,1,10)
WHEN b.Record_type = 'D20' THEN SUBSTRING(b.RECORD_TEXT,2,3)
WHEN b.Record_type = 'D30' THEN SUBSTRING(b.RECORD_TEXT,9,8)
WHEN b.Record_type = 'H02' THEN SUBSTRING(b.RECORD_TEXT,13,4)
END AS RecordTextSplit
FROM cteLead AS a
JOIN cte AS b ON b.DERIVED_COLUMN >= a.DERIVED_COLUMN_LEFT AND b.DERIVED_COLUMN < a.DERIVED_COLUMN_RIGHT
WHERE b.Record_type IN ('H01', 'D20', 'D30', 'H02')
)
--
SELECT * FROM cteSplit
PIVOT
(
MAX(RecordTextSplit)
FOR Record_Type IN (H01, D20, D30, H02)
) AS pvt
添加时间戳列后,可以轻松地为每笔交易分配唯一编号。然后您可以应用您现有的计算:
SELECT
trans#,
MAX(CASE WHEN RECORD_TYPE = 'H01' THEN (SUBSTR(RECORD_TEXT, 1,10)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'D20' THEN ',' || (SUBSTR(RECORD_TEXT, 2, 3)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'D30' THEN ',' || (SUBSTR(RECORD_TEXT, 9, 8)) END)
|| MAX(CASE WHEN RECORD_TYPE = 'H02' THEN ',' || (SUBSTR(RECORD_TEXT,13, 4)) END)) AS TOTAL_FIELD
FROM
(
SELECT CREATE_TMSP,RECORD_TYPE, RECORD_TEXT,
-- assign a unique number to each transaction
SUM(CASE WHEN Record_type = 'H01' THEN 1 ELSE 0 END)
OVER (ORDER BY CREATE_TMSP
ROWS UNBOUNDED PRECEDING) AS trans#
FROM table
-- more efficient to filter unneeded data before the OLAP function
WHERE RECORD_TYPE IN ('H01','D20','D30','H02')
-- uncomment if the data doesn't start with an 'H01' row and you don't want partial transactions
-- QUALIFY trans# > 0
) AS dt
GROUP BY trans#