如何创建一个视图来动态填充 table 中缺失的数据行

How to create a view that dynamically fills in missing rows of data from a table

我想创建一个视图,动态填充源 test_table 中缺失的数据行。源 test_table 在进行更改时更新,最新记录由列 current_record 指示符标记为 'Y'。下面是一个例子:-

TEST_ID1234一月份更新了4次

CREATE TABLE test_table
( test_id           NUMBER(10),
  test_name         VARCHAR2(50),
  test_date         DATE,
  age               NUMBER(10),
  current_record    VARCHAR2(1),
  test_data         VARCHAR2(50)
);

Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'ABC',to_date('08-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),0,null,'DATA1');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'ABC',to_date('17-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),9,null,'DATA2');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'XYZ',to_date('22-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),14,null,'DATA3');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'XYZ',to_date('31-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),23,'Y','DATA4');

objective 是用数据集中以前的记录填充缺失的日期范围。这需要 TEST_DATE & TEST_AGE 相应增加。但是,TEST_NAME 和 TEST_DATA 会填充上一条记录。最近的记录必须只有 'Y' CURRENT_RECORD

这可以在 PL/SQL 中完成,但我们被要求研究可能的创建视图解决方案。以下是预期的输出:-

粗体行存在于测试中table,其他行是动态创建的。

您可以使用递归 cte 来生成日期系列。然后,您可以 left join 使用您原来的 table。最后,您可以使用 lag() 和方便的选项 ignore nulls 来填补空白(Oracle 支持 - 与大多数其他数据库不同):

with cte (test_date, max_date, age) as (
    select min(test_date), max(test_date), min(age) from test_table
    union all
    select test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
    coalesce(
        t.test_id, 
        lag(t.test_id) ignore nulls over(order by c.test_date)
    ) test_id,
    coalesce(
        t.test_name, 
        lag(t.test_name) ignore nulls over(order by c.test_date)
    ) test_name,
    c.test_date,
    c.age,
    coalesce(
        t.current_record, 
        lag(t.current_record) ignore nulls over(order by c.test_date)
    ) current_record,
    coalesce(
        t.test_data, 
        lag(t.test_data) ignore nulls over(order by c.test_date)
    ) test_data
from cte c
left join test_table t on t.test_date = c.test_date
order by c.test_date

Demo on DB Fiddle:

TEST_ID | TEST_NAME | TEST_DATE | AGE | CURRENT_RECORD | TEST_DATA
------: | :-------- | :-------- | --: | :------------- | :--------
   1234 | ABC       | 08-JAN-20 |   0 | null           | DATA1    
   1234 | ABC       | 09-JAN-20 |   1 | null           | DATA1    
   1234 | ABC       | 10-JAN-20 |   2 | null           | DATA1    
   1234 | ABC       | 11-JAN-20 |   3 | null           | DATA1    
   1234 | ABC       | 12-JAN-20 |   4 | null           | DATA1    
   1234 | ABC       | 13-JAN-20 |   5 | null           | DATA1    
   1234 | ABC       | 14-JAN-20 |   6 | null           | DATA1    
   1234 | ABC       | 15-JAN-20 |   7 | null           | DATA1    
   1234 | ABC       | 16-JAN-20 |   8 | null           | DATA1    
   1234 | ABC       | 17-JAN-20 |   9 | null           | DATA2    
   1234 | ABC       | 18-JAN-20 |  10 | null           | DATA2    
   1234 | ABC       | 19-JAN-20 |  11 | null           | DATA2    
   1234 | ABC       | 20-JAN-20 |  12 | null           | DATA2    
   1234 | ABC       | 21-JAN-20 |  13 | null           | DATA2    
   1234 | XYZ       | 22-JAN-20 |  14 | null           | DATA3    
   1234 | XYZ       | 23-JAN-20 |  15 | null           | DATA3    
   1234 | XYZ       | 24-JAN-20 |  16 | null           | DATA3    
   1234 | XYZ       | 25-JAN-20 |  17 | null           | DATA3    
   1234 | XYZ       | 26-JAN-20 |  18 | null           | DATA3    
   1234 | XYZ       | 27-JAN-20 |  19 | null           | DATA3    
   1234 | XYZ       | 28-JAN-20 |  20 | null           | DATA3    
   1234 | XYZ       | 29-JAN-20 |  21 | null           | DATA3    
   1234 | XYZ       | 30-JAN-20 |  22 | null           | DATA3    
   1234 | XYZ       | 31-JAN-20 |  23 | Y              | DATA4    

实际上,window 函数的选项 ignore nulls 仅在 Oracle 12 中引入。如果您真的在使用 11g,如问题标签所示,那么我们可以用 window 计数和 first_value():

模拟 ignore nulls
with cte (test_date, max_date, age) as (
    select min(test_date), max(test_date), min(age) from test_table
    union all
    select test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
    first_value(test_id)        over(partition by grp order by test_date) test_name,
    first_value(test_name)      over(partition by grp order by test_date) test_name,
    test_date,
    age,
    first_value(current_record) over(partition by grp order by test_date) current_record,
    first_value(test_data)      over(partition by grp order by test_date) test_data
from (
    select 
        c.test_id,
        t.test_name,
        c.test_date,
        c.age,
        t.current_record,
        t.test_data,
        count(t.test_id) over(order by c.test_date) grp
    from cte c
    left join test_table t on t.test_date = c.test_date
) t
order by test_date

最后:如果您需要管理多个test_id

with cte (test_id, test_date, max_date, age) as (
    select test_id, min(test_date), max(test_date), min(age) from test_table group by test_id
    union all
    select test_id, test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
    test_id,
    first_value(test_name)      over(partition by test_id, grp order by test_date) test_name,
    test_date,
    age,
    first_value(current_record) over(partition by test_id, grp order by test_date) current_record,
    first_value(test_data)      over(partition by test_id, grp order by test_date) test_data
from (
    select 
        c.test_id,
        t.test_name,
        c.test_date,
        c.age,
        t.current_record,
        t.test_data,
        count(t.test_id) over(partition by c.test_id order by c.test_date) grp
    from cte c
    left join test_table t on t.test_id = c.test_id and t.test_date = c.test_date
) t
order by test_id, test_date

感谢大家的帮助,这是最终的解决方案

    with cte (test_id, test_date, max_date, age) as (
        select test_id, min(test_date), max(test_date), min(age) from test_table group by test_id
        union all
        select test_id, test_date + 1, max_date, age + 1 from cte where test_date < max_date
    )

    select
        test_id,
        first_value(test_name)      over(partition by test_id,grp order by test_date) test_name,
        test_date,
        age,
        first_value(current_record) over(partition by test_id,grp order by test_date) current_record,
        first_value(test_data)      over(partition by test_id,grp order by test_date) test_data
    from (
        select 
            c.test_id,
            t.test_name,
            c.test_date,
            c.age,
            t.current_record,
            t.test_data,
            count(t.test_id) over(partition by c.test_id  order by c.test_date) grp
        from cte c
        left join test_table t on t.test_id = c.test_id and t.test_date = c.test_date
    ) t
    order by test_id, test_date