Oracle SQL - 带有 Rtrim 和 Regexp 的 Listagg - 重复项仍然存在
Oracle SQL - Listagg with Rtrim and Regexp - Duplicates still present
我将 LISTAGG 与 RTRIM 和 REGEXP_REPLACE 结合使用来创建以逗号分隔的考试成绩列表,但会删除重复项。
问题是仍然有重复项。
下面给出的数据直接来自 SORTEST table。 (SELECT * FROM SORTEST WHERE SORTEST_PIDM = '260670') 我确实删除了我不使用的列。
Q1:为什么会有重复?
Q2:如何淘汰?
我觉得可能是因为有两套A01-A05的成绩。它只发生在拥有不止一组 A 分数的人身上。这对我来说没有意义,因为我正在寻找像 'A%B'
这样的分数
代码:
SELECT DISTINCT
SP.SPRIDEN_ID AS "STUDENT_ID",
t2.sortest_pidm,
SP.SPRIDEN_LAST_NAME AS "LAST",
SP.SPRIDEN_FIRST_NAME AS "FIRST",
RTRIM(
REGEXP_REPLACE(
(
listagg ((T2.SORTEST_TESC_CODE || '-' || T2.SORTEST_TEST_SCORE), ', ')
WITHIN GROUP (ORDER BY SP.SPRIDEN_ID)
OVER (PARTITION BY SP.SPRIDEN_ID)),
'([^-]*)(-)+($|-)',
''),
'-') TEST
FROM
SPRIDEN SP
left outer join SPBPERS B on SP.spriden_pidm = b.spbpers_pidm
JOIN SORTEST T2 ON T2.SORTEST_PIDM = SP.SPRIDEN_PIDM
WHERE
SP.SPRIDEN_CHANGE_IND IS NULL
AND B.SPBPERS_DEAD_IND IS NULL
AND B.SPBPERS_CONFID_IND <> 'Y'
AND T2.SORTEST_TADM_CODE IS NULL
AND
T2.SORTEST_TESC_CODE IN ('CM1B', 'CM2B', 'CR1B', 'CW1B', 'A01B', 'A02B', 'A03B', 'A04B',
'A05B', 'S01B', 'S02B', 'S95B', 'DSPW', 'DSPR', 'DSPM')
AND sP.spriden_change_ind is null
AND SP.SPRIDEN_ID IN ( 'A00154876')
数据
请注意 SORTEST_PIDM = SPRIDEN_PIDM。我没有包含 SPRIDEN ID 或名称,因为我想简化数据部分。
+--------------+-------------------+-------------------+--------------------+
| SORTEST_PIDM | SORTEST_TESC_CODE | SORTEST_TEST_DATE | SORTEST_TEST_SCORE |
+--------------+-------------------+-------------------+--------------------+
| | | | |
| 260670 | A01 | 1-Mar-12 | 20 |
| 260670 | A01 | 1-Oct-12 | 22 |
| 260670 | A01B | 9-Jan-13 | 22 |
| 260670 | A02 | 1-Mar-12 | 19 |
| 260670 | A02 | 1-Oct-12 | 19 |
| 260670 | A02B | 5-Jun-12 | 19 |
| 260670 | A03 | 1-Mar-12 | 21 |
| 260670 | A03 | 1-Oct-12 | 19 |
| 260670 | A03B | 5-Jun-12 | 21 |
| 260670 | A04 | 1-Mar-12 | 23 |
| 260670 | A04 | 1-Oct-12 | 22 |
| 260670 | A04B | 5-Jun-12 | 23 |
| 260670 | A05 | 1-Mar-12 | 21 |
| 260670 | A05 | 1-Oct-12 | 21 |
| 260670 | A05B | 5-Jun-12 | 21 |
| 260670 | DSPM | 5-Jun-12 | 4 |
| 260670 | DSPR | 5-Jun-12 | 4 |
| 260670 | DSPW | 5-Jun-12 | 4 |
+--------------+-------------------+-------------------+--------------------+
结果:
+------------+--------------+--------+--------+------------------------------------------------+
| STUDENT_ID | SORTEST_PIDM | LAST | FIRST | TEST |
+------------+--------------+--------+--------+------------------------------------------------+
| A00154876 | 260670 | Fowler | Martin | A01B-22, A02B-19, A03B-21, A04B-23, A05B-21, |
| | | | | DSPM-4, DSPR-4, DSPW-4, |
| | | | | A01B-22, A02B-19, A03B-21, A04B-23, A05B-21, |
| | | | | DSPM-4, DSPR-4, DSPW-4 |
+------------+--------------+--------+--------+------------------------------------------------+
这些是我想要的结果:
+------------+--------------+--------+--------+--------------------------------------------+
| STUDENT_ID | SORTEST_PIDM | LAST | FIRST | TEST |
+------------+--------------+--------+--------+--------------------------------------------+
| A00249466 | 260670 | Fowler | Martin | A01B-22, A02B-19, A03B-21,A04B-23, A05B-21,|
| | | | | DSPM-4, DSPR-4, DSPW-4 |
+------------+--------------+--------+--------+--------------------------------------------+
Oracle 设置:
CREATE TABLE SPRIDEN( SPRIDEN_ID, SPRIDEN_PIDM, SPRIDEN_LAST_NAME, SPRIDEN_FIRST_NAME ) AS
SELECT 'A00154876', 260670, 'Fowler', 'Martin' FROM DUAL;
CREATE TABLE SORTEST ( SORTEST_PIDM, SORTEST_TESC_CODE, SORTEST_TEST_DATE, SORTEST_TEST_SCORE ) AS
SELECT 260670, 'A01', DATE '2012-03-1', 20 FROM DUAL UNION ALL
SELECT 260670, 'A01', DATE '2012-10-1', 22 FROM DUAL UNION ALL
SELECT 260670, 'A01B', DATE '2013-01-9', 22 FROM DUAL UNION ALL
SELECT 260670, 'A02', DATE '2012-03-1', 19 FROM DUAL UNION ALL
SELECT 260670, 'A02', DATE '2012-10-1', 19 FROM DUAL UNION ALL
SELECT 260670, 'A02B', DATE '2012-06-5', 19 FROM DUAL UNION ALL
SELECT 260670, 'A03', DATE '2012-03-1', 21 FROM DUAL UNION ALL
SELECT 260670, 'A03', DATE '2012-10-1', 19 FROM DUAL UNION ALL
SELECT 260670, 'A03B', DATE '2012-06-5', 21 FROM DUAL UNION ALL
SELECT 260670, 'A04', DATE '2012-03-1', 23 FROM DUAL UNION ALL
SELECT 260670, 'A04', DATE '2012-10-1', 22 FROM DUAL UNION ALL
SELECT 260670, 'A04B', DATE '2012-06-5', 23 FROM DUAL UNION ALL
SELECT 260670, 'A05', DATE '2012-03-1', 21 FROM DUAL UNION ALL
SELECT 260670, 'A05', DATE '2012-10-1', 21 FROM DUAL UNION ALL
SELECT 260670, 'A05B', DATE '2012-06-5', 21 FROM DUAL UNION ALL
SELECT 260670, 'DSPM', DATE '2012-06-5', 4 FROM DUAL UNION ALL
SELECT 260670, 'DSPR', DATE '2012-06-5', 4 FROM DUAL UNION ALL
SELECT 260670, 'DSPW', DATE '2012-06-5', 4 FROM DUAL;
查询:
SELECT DISTINCT
SP.SPRIDEN_ID AS "STUDENT_ID",
t2.sortest_pidm,
SP.SPRIDEN_LAST_NAME AS "LAST",
SP.SPRIDEN_FIRST_NAME AS "FIRST",
listagg ( T2.SORTEST_TESC_CODE || '-' || T2.SORTEST_TEST_SCORE, ', ')
WITHIN GROUP (ORDER BY T2.SORTEST_TESC_CODE, T2.SORTEST_TEST_SCORE)
OVER (PARTITION BY SP.SPRIDEN_ID) AS TEST
FROM SPRIDEN SP
JOIN ( SELECT DISTINCT
SORTEST_PIDM,
SORTEST_TESC_CODE,
SORTEST_TEST_SCORE
FROM SORTEST
WHERE SORTEST_TESC_CODE IN ('CM1B', 'CM2B', 'CR1B', 'CW1B', 'A01B', 'A02B', 'A03B', 'A04B', 'A05B', 'S01B', 'S02B', 'S95B', 'DSPW', 'DSPR', 'DSPM') ) T2
ON T2.SORTEST_PIDM = SP.SPRIDEN_PIDM;
结果:
STUDENT_ID SORTEST_PIDM LAST FIRST TEST
---------- ------------ ------ ------ --------------------------------------------------------------------
A00154876 260670 Fowler Martin A01B-22, A02B-19, A03B-21, A04B-23, A05B-21, DSPM-4, DSPR-4, DSPW-4
我将 LISTAGG 与 RTRIM 和 REGEXP_REPLACE 结合使用来创建以逗号分隔的考试成绩列表,但会删除重复项。
问题是仍然有重复项。
下面给出的数据直接来自 SORTEST table。 (SELECT * FROM SORTEST WHERE SORTEST_PIDM = '260670') 我确实删除了我不使用的列。
Q1:为什么会有重复?
Q2:如何淘汰?
我觉得可能是因为有两套A01-A05的成绩。它只发生在拥有不止一组 A 分数的人身上。这对我来说没有意义,因为我正在寻找像 'A%B'
这样的分数代码:
SELECT DISTINCT
SP.SPRIDEN_ID AS "STUDENT_ID",
t2.sortest_pidm,
SP.SPRIDEN_LAST_NAME AS "LAST",
SP.SPRIDEN_FIRST_NAME AS "FIRST",
RTRIM(
REGEXP_REPLACE(
(
listagg ((T2.SORTEST_TESC_CODE || '-' || T2.SORTEST_TEST_SCORE), ', ')
WITHIN GROUP (ORDER BY SP.SPRIDEN_ID)
OVER (PARTITION BY SP.SPRIDEN_ID)),
'([^-]*)(-)+($|-)',
''),
'-') TEST
FROM
SPRIDEN SP
left outer join SPBPERS B on SP.spriden_pidm = b.spbpers_pidm
JOIN SORTEST T2 ON T2.SORTEST_PIDM = SP.SPRIDEN_PIDM
WHERE
SP.SPRIDEN_CHANGE_IND IS NULL
AND B.SPBPERS_DEAD_IND IS NULL
AND B.SPBPERS_CONFID_IND <> 'Y'
AND T2.SORTEST_TADM_CODE IS NULL
AND
T2.SORTEST_TESC_CODE IN ('CM1B', 'CM2B', 'CR1B', 'CW1B', 'A01B', 'A02B', 'A03B', 'A04B',
'A05B', 'S01B', 'S02B', 'S95B', 'DSPW', 'DSPR', 'DSPM')
AND sP.spriden_change_ind is null
AND SP.SPRIDEN_ID IN ( 'A00154876')
数据
请注意 SORTEST_PIDM = SPRIDEN_PIDM。我没有包含 SPRIDEN ID 或名称,因为我想简化数据部分。
+--------------+-------------------+-------------------+--------------------+ | SORTEST_PIDM | SORTEST_TESC_CODE | SORTEST_TEST_DATE | SORTEST_TEST_SCORE | +--------------+-------------------+-------------------+--------------------+ | | | | | | 260670 | A01 | 1-Mar-12 | 20 | | 260670 | A01 | 1-Oct-12 | 22 | | 260670 | A01B | 9-Jan-13 | 22 | | 260670 | A02 | 1-Mar-12 | 19 | | 260670 | A02 | 1-Oct-12 | 19 | | 260670 | A02B | 5-Jun-12 | 19 | | 260670 | A03 | 1-Mar-12 | 21 | | 260670 | A03 | 1-Oct-12 | 19 | | 260670 | A03B | 5-Jun-12 | 21 | | 260670 | A04 | 1-Mar-12 | 23 | | 260670 | A04 | 1-Oct-12 | 22 | | 260670 | A04B | 5-Jun-12 | 23 | | 260670 | A05 | 1-Mar-12 | 21 | | 260670 | A05 | 1-Oct-12 | 21 | | 260670 | A05B | 5-Jun-12 | 21 | | 260670 | DSPM | 5-Jun-12 | 4 | | 260670 | DSPR | 5-Jun-12 | 4 | | 260670 | DSPW | 5-Jun-12 | 4 | +--------------+-------------------+-------------------+--------------------+
结果:
+------------+--------------+--------+--------+------------------------------------------------+ | STUDENT_ID | SORTEST_PIDM | LAST | FIRST | TEST | +------------+--------------+--------+--------+------------------------------------------------+ | A00154876 | 260670 | Fowler | Martin | A01B-22, A02B-19, A03B-21, A04B-23, A05B-21, | | | | | | DSPM-4, DSPR-4, DSPW-4, | | | | | | A01B-22, A02B-19, A03B-21, A04B-23, A05B-21, | | | | | | DSPM-4, DSPR-4, DSPW-4 | +------------+--------------+--------+--------+------------------------------------------------+
这些是我想要的结果:
+------------+--------------+--------+--------+--------------------------------------------+ | STUDENT_ID | SORTEST_PIDM | LAST | FIRST | TEST | +------------+--------------+--------+--------+--------------------------------------------+ | A00249466 | 260670 | Fowler | Martin | A01B-22, A02B-19, A03B-21,A04B-23, A05B-21,| | | | | | DSPM-4, DSPR-4, DSPW-4 | +------------+--------------+--------+--------+--------------------------------------------+
Oracle 设置:
CREATE TABLE SPRIDEN( SPRIDEN_ID, SPRIDEN_PIDM, SPRIDEN_LAST_NAME, SPRIDEN_FIRST_NAME ) AS
SELECT 'A00154876', 260670, 'Fowler', 'Martin' FROM DUAL;
CREATE TABLE SORTEST ( SORTEST_PIDM, SORTEST_TESC_CODE, SORTEST_TEST_DATE, SORTEST_TEST_SCORE ) AS
SELECT 260670, 'A01', DATE '2012-03-1', 20 FROM DUAL UNION ALL
SELECT 260670, 'A01', DATE '2012-10-1', 22 FROM DUAL UNION ALL
SELECT 260670, 'A01B', DATE '2013-01-9', 22 FROM DUAL UNION ALL
SELECT 260670, 'A02', DATE '2012-03-1', 19 FROM DUAL UNION ALL
SELECT 260670, 'A02', DATE '2012-10-1', 19 FROM DUAL UNION ALL
SELECT 260670, 'A02B', DATE '2012-06-5', 19 FROM DUAL UNION ALL
SELECT 260670, 'A03', DATE '2012-03-1', 21 FROM DUAL UNION ALL
SELECT 260670, 'A03', DATE '2012-10-1', 19 FROM DUAL UNION ALL
SELECT 260670, 'A03B', DATE '2012-06-5', 21 FROM DUAL UNION ALL
SELECT 260670, 'A04', DATE '2012-03-1', 23 FROM DUAL UNION ALL
SELECT 260670, 'A04', DATE '2012-10-1', 22 FROM DUAL UNION ALL
SELECT 260670, 'A04B', DATE '2012-06-5', 23 FROM DUAL UNION ALL
SELECT 260670, 'A05', DATE '2012-03-1', 21 FROM DUAL UNION ALL
SELECT 260670, 'A05', DATE '2012-10-1', 21 FROM DUAL UNION ALL
SELECT 260670, 'A05B', DATE '2012-06-5', 21 FROM DUAL UNION ALL
SELECT 260670, 'DSPM', DATE '2012-06-5', 4 FROM DUAL UNION ALL
SELECT 260670, 'DSPR', DATE '2012-06-5', 4 FROM DUAL UNION ALL
SELECT 260670, 'DSPW', DATE '2012-06-5', 4 FROM DUAL;
查询:
SELECT DISTINCT
SP.SPRIDEN_ID AS "STUDENT_ID",
t2.sortest_pidm,
SP.SPRIDEN_LAST_NAME AS "LAST",
SP.SPRIDEN_FIRST_NAME AS "FIRST",
listagg ( T2.SORTEST_TESC_CODE || '-' || T2.SORTEST_TEST_SCORE, ', ')
WITHIN GROUP (ORDER BY T2.SORTEST_TESC_CODE, T2.SORTEST_TEST_SCORE)
OVER (PARTITION BY SP.SPRIDEN_ID) AS TEST
FROM SPRIDEN SP
JOIN ( SELECT DISTINCT
SORTEST_PIDM,
SORTEST_TESC_CODE,
SORTEST_TEST_SCORE
FROM SORTEST
WHERE SORTEST_TESC_CODE IN ('CM1B', 'CM2B', 'CR1B', 'CW1B', 'A01B', 'A02B', 'A03B', 'A04B', 'A05B', 'S01B', 'S02B', 'S95B', 'DSPW', 'DSPR', 'DSPM') ) T2
ON T2.SORTEST_PIDM = SP.SPRIDEN_PIDM;
结果:
STUDENT_ID SORTEST_PIDM LAST FIRST TEST
---------- ------------ ------ ------ --------------------------------------------------------------------
A00154876 260670 Fowler Martin A01B-22, A02B-19, A03B-21, A04B-23, A05B-21, DSPM-4, DSPR-4, DSPW-4