如何创建一个视图来动态填充 table 中缺失的数据行
How to create a view that dynamically fills in missing rows of data from a table
我想创建一个视图,动态填充源 test_table 中缺失的数据行。源 test_table 在进行更改时更新,最新记录由列 current_record 指示符标记为 'Y'。下面是一个例子:-
TEST_ID1234一月份更新了4次
CREATE TABLE test_table
( test_id NUMBER(10),
test_name VARCHAR2(50),
test_date DATE,
age NUMBER(10),
current_record VARCHAR2(1),
test_data VARCHAR2(50)
);
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'ABC',to_date('08-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),0,null,'DATA1');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'ABC',to_date('17-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),9,null,'DATA2');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'XYZ',to_date('22-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),14,null,'DATA3');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'XYZ',to_date('31-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),23,'Y','DATA4');
objective 是用数据集中以前的记录填充缺失的日期范围。这需要 TEST_DATE & TEST_AGE 相应增加。但是,TEST_NAME 和 TEST_DATA 会填充上一条记录。最近的记录必须只有 'Y' CURRENT_RECORD
这可以在 PL/SQL 中完成,但我们被要求研究可能的创建视图解决方案。以下是预期的输出:-
粗体行存在于测试中table,其他行是动态创建的。
您可以使用递归 cte 来生成日期系列。然后,您可以 left join
使用您原来的 table。最后,您可以使用 lag()
和方便的选项 ignore nulls
来填补空白(Oracle 支持 - 与大多数其他数据库不同):
with cte (test_date, max_date, age) as (
select min(test_date), max(test_date), min(age) from test_table
union all
select test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
coalesce(
t.test_id,
lag(t.test_id) ignore nulls over(order by c.test_date)
) test_id,
coalesce(
t.test_name,
lag(t.test_name) ignore nulls over(order by c.test_date)
) test_name,
c.test_date,
c.age,
coalesce(
t.current_record,
lag(t.current_record) ignore nulls over(order by c.test_date)
) current_record,
coalesce(
t.test_data,
lag(t.test_data) ignore nulls over(order by c.test_date)
) test_data
from cte c
left join test_table t on t.test_date = c.test_date
order by c.test_date
TEST_ID | TEST_NAME | TEST_DATE | AGE | CURRENT_RECORD | TEST_DATA
------: | :-------- | :-------- | --: | :------------- | :--------
1234 | ABC | 08-JAN-20 | 0 | null | DATA1
1234 | ABC | 09-JAN-20 | 1 | null | DATA1
1234 | ABC | 10-JAN-20 | 2 | null | DATA1
1234 | ABC | 11-JAN-20 | 3 | null | DATA1
1234 | ABC | 12-JAN-20 | 4 | null | DATA1
1234 | ABC | 13-JAN-20 | 5 | null | DATA1
1234 | ABC | 14-JAN-20 | 6 | null | DATA1
1234 | ABC | 15-JAN-20 | 7 | null | DATA1
1234 | ABC | 16-JAN-20 | 8 | null | DATA1
1234 | ABC | 17-JAN-20 | 9 | null | DATA2
1234 | ABC | 18-JAN-20 | 10 | null | DATA2
1234 | ABC | 19-JAN-20 | 11 | null | DATA2
1234 | ABC | 20-JAN-20 | 12 | null | DATA2
1234 | ABC | 21-JAN-20 | 13 | null | DATA2
1234 | XYZ | 22-JAN-20 | 14 | null | DATA3
1234 | XYZ | 23-JAN-20 | 15 | null | DATA3
1234 | XYZ | 24-JAN-20 | 16 | null | DATA3
1234 | XYZ | 25-JAN-20 | 17 | null | DATA3
1234 | XYZ | 26-JAN-20 | 18 | null | DATA3
1234 | XYZ | 27-JAN-20 | 19 | null | DATA3
1234 | XYZ | 28-JAN-20 | 20 | null | DATA3
1234 | XYZ | 29-JAN-20 | 21 | null | DATA3
1234 | XYZ | 30-JAN-20 | 22 | null | DATA3
1234 | XYZ | 31-JAN-20 | 23 | Y | DATA4
实际上,window 函数的选项 ignore nulls
仅在 Oracle 12 中引入。如果您真的在使用 11g,如问题标签所示,那么我们可以用 window 计数和 first_value()
:
模拟 ignore nulls
with cte (test_date, max_date, age) as (
select min(test_date), max(test_date), min(age) from test_table
union all
select test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
first_value(test_id) over(partition by grp order by test_date) test_name,
first_value(test_name) over(partition by grp order by test_date) test_name,
test_date,
age,
first_value(current_record) over(partition by grp order by test_date) current_record,
first_value(test_data) over(partition by grp order by test_date) test_data
from (
select
c.test_id,
t.test_name,
c.test_date,
c.age,
t.current_record,
t.test_data,
count(t.test_id) over(order by c.test_date) grp
from cte c
left join test_table t on t.test_date = c.test_date
) t
order by test_date
最后:如果您需要管理多个test_id
:
with cte (test_id, test_date, max_date, age) as (
select test_id, min(test_date), max(test_date), min(age) from test_table group by test_id
union all
select test_id, test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
test_id,
first_value(test_name) over(partition by test_id, grp order by test_date) test_name,
test_date,
age,
first_value(current_record) over(partition by test_id, grp order by test_date) current_record,
first_value(test_data) over(partition by test_id, grp order by test_date) test_data
from (
select
c.test_id,
t.test_name,
c.test_date,
c.age,
t.current_record,
t.test_data,
count(t.test_id) over(partition by c.test_id order by c.test_date) grp
from cte c
left join test_table t on t.test_id = c.test_id and t.test_date = c.test_date
) t
order by test_id, test_date
感谢大家的帮助,这是最终的解决方案
with cte (test_id, test_date, max_date, age) as (
select test_id, min(test_date), max(test_date), min(age) from test_table group by test_id
union all
select test_id, test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
test_id,
first_value(test_name) over(partition by test_id,grp order by test_date) test_name,
test_date,
age,
first_value(current_record) over(partition by test_id,grp order by test_date) current_record,
first_value(test_data) over(partition by test_id,grp order by test_date) test_data
from (
select
c.test_id,
t.test_name,
c.test_date,
c.age,
t.current_record,
t.test_data,
count(t.test_id) over(partition by c.test_id order by c.test_date) grp
from cte c
left join test_table t on t.test_id = c.test_id and t.test_date = c.test_date
) t
order by test_id, test_date
我想创建一个视图,动态填充源 test_table 中缺失的数据行。源 test_table 在进行更改时更新,最新记录由列 current_record 指示符标记为 'Y'。下面是一个例子:-
TEST_ID1234一月份更新了4次
CREATE TABLE test_table
( test_id NUMBER(10),
test_name VARCHAR2(50),
test_date DATE,
age NUMBER(10),
current_record VARCHAR2(1),
test_data VARCHAR2(50)
);
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'ABC',to_date('08-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),0,null,'DATA1');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'ABC',to_date('17-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),9,null,'DATA2');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'XYZ',to_date('22-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),14,null,'DATA3');
Insert into TEST_TABLE (TEST_ID,TEST_NAME,TEST_DATE,AGE,CURRENT_RECORD,TEST_DATA) values (1234,'XYZ',to_date('31-JAN-2020 00.00.00','DD-MON-RRRR HH24.MI.SS'),23,'Y','DATA4');
objective 是用数据集中以前的记录填充缺失的日期范围。这需要 TEST_DATE & TEST_AGE 相应增加。但是,TEST_NAME 和 TEST_DATA 会填充上一条记录。最近的记录必须只有 'Y' CURRENT_RECORD
这可以在 PL/SQL 中完成,但我们被要求研究可能的创建视图解决方案。以下是预期的输出:-
粗体行存在于测试中table,其他行是动态创建的。
您可以使用递归 cte 来生成日期系列。然后,您可以 left join
使用您原来的 table。最后,您可以使用 lag()
和方便的选项 ignore nulls
来填补空白(Oracle 支持 - 与大多数其他数据库不同):
with cte (test_date, max_date, age) as (
select min(test_date), max(test_date), min(age) from test_table
union all
select test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
coalesce(
t.test_id,
lag(t.test_id) ignore nulls over(order by c.test_date)
) test_id,
coalesce(
t.test_name,
lag(t.test_name) ignore nulls over(order by c.test_date)
) test_name,
c.test_date,
c.age,
coalesce(
t.current_record,
lag(t.current_record) ignore nulls over(order by c.test_date)
) current_record,
coalesce(
t.test_data,
lag(t.test_data) ignore nulls over(order by c.test_date)
) test_data
from cte c
left join test_table t on t.test_date = c.test_date
order by c.test_date
TEST_ID | TEST_NAME | TEST_DATE | AGE | CURRENT_RECORD | TEST_DATA ------: | :-------- | :-------- | --: | :------------- | :-------- 1234 | ABC | 08-JAN-20 | 0 | null | DATA1 1234 | ABC | 09-JAN-20 | 1 | null | DATA1 1234 | ABC | 10-JAN-20 | 2 | null | DATA1 1234 | ABC | 11-JAN-20 | 3 | null | DATA1 1234 | ABC | 12-JAN-20 | 4 | null | DATA1 1234 | ABC | 13-JAN-20 | 5 | null | DATA1 1234 | ABC | 14-JAN-20 | 6 | null | DATA1 1234 | ABC | 15-JAN-20 | 7 | null | DATA1 1234 | ABC | 16-JAN-20 | 8 | null | DATA1 1234 | ABC | 17-JAN-20 | 9 | null | DATA2 1234 | ABC | 18-JAN-20 | 10 | null | DATA2 1234 | ABC | 19-JAN-20 | 11 | null | DATA2 1234 | ABC | 20-JAN-20 | 12 | null | DATA2 1234 | ABC | 21-JAN-20 | 13 | null | DATA2 1234 | XYZ | 22-JAN-20 | 14 | null | DATA3 1234 | XYZ | 23-JAN-20 | 15 | null | DATA3 1234 | XYZ | 24-JAN-20 | 16 | null | DATA3 1234 | XYZ | 25-JAN-20 | 17 | null | DATA3 1234 | XYZ | 26-JAN-20 | 18 | null | DATA3 1234 | XYZ | 27-JAN-20 | 19 | null | DATA3 1234 | XYZ | 28-JAN-20 | 20 | null | DATA3 1234 | XYZ | 29-JAN-20 | 21 | null | DATA3 1234 | XYZ | 30-JAN-20 | 22 | null | DATA3 1234 | XYZ | 31-JAN-20 | 23 | Y | DATA4
实际上,window 函数的选项 ignore nulls
仅在 Oracle 12 中引入。如果您真的在使用 11g,如问题标签所示,那么我们可以用 window 计数和 first_value()
:
ignore nulls
with cte (test_date, max_date, age) as (
select min(test_date), max(test_date), min(age) from test_table
union all
select test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
first_value(test_id) over(partition by grp order by test_date) test_name,
first_value(test_name) over(partition by grp order by test_date) test_name,
test_date,
age,
first_value(current_record) over(partition by grp order by test_date) current_record,
first_value(test_data) over(partition by grp order by test_date) test_data
from (
select
c.test_id,
t.test_name,
c.test_date,
c.age,
t.current_record,
t.test_data,
count(t.test_id) over(order by c.test_date) grp
from cte c
left join test_table t on t.test_date = c.test_date
) t
order by test_date
最后:如果您需要管理多个test_id
:
with cte (test_id, test_date, max_date, age) as (
select test_id, min(test_date), max(test_date), min(age) from test_table group by test_id
union all
select test_id, test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
test_id,
first_value(test_name) over(partition by test_id, grp order by test_date) test_name,
test_date,
age,
first_value(current_record) over(partition by test_id, grp order by test_date) current_record,
first_value(test_data) over(partition by test_id, grp order by test_date) test_data
from (
select
c.test_id,
t.test_name,
c.test_date,
c.age,
t.current_record,
t.test_data,
count(t.test_id) over(partition by c.test_id order by c.test_date) grp
from cte c
left join test_table t on t.test_id = c.test_id and t.test_date = c.test_date
) t
order by test_id, test_date
感谢大家的帮助,这是最终的解决方案
with cte (test_id, test_date, max_date, age) as (
select test_id, min(test_date), max(test_date), min(age) from test_table group by test_id
union all
select test_id, test_date + 1, max_date, age + 1 from cte where test_date < max_date
)
select
test_id,
first_value(test_name) over(partition by test_id,grp order by test_date) test_name,
test_date,
age,
first_value(current_record) over(partition by test_id,grp order by test_date) current_record,
first_value(test_data) over(partition by test_id,grp order by test_date) test_data
from (
select
c.test_id,
t.test_name,
c.test_date,
c.age,
t.current_record,
t.test_data,
count(t.test_id) over(partition by c.test_id order by c.test_date) grp
from cte c
left join test_table t on t.test_id = c.test_id and t.test_date = c.test_date
) t
order by test_id, test_date