根据另一个 table 的计数,来自 table 的红移样本
Redshift sample from table based on count of another table
我的 TableA 有 3000 行(可以是小于 10000 的任何数字)。我需要创建包含 10000 行的 TableX。因此,我需要 select 随机 10000 -(TableA 中的行数)来自 TableB(并添加到 TableA 中)来创建 TableX。有什么想法吗?
像这样的东西(这显然行不通):
Create table TableX as
select * from TableA
union
select * from TableB limit (10000 - count(*) from TableA);
您可以使用 union all
和 window 函数。您没有列出 table 列,所以我假设 col1
和 col2
:
insert into tableX (col1, col2)
select col1, col2 from table1
union all
select t2.col1, t2.col2
from (select t2.*, row_number() over(order by random()) from table2 t2) t2
inner join (select count(*) cnt from table1) t1 on t2.rn <= 10000 - t1.cnt
第一个查询 union all
select 中的所有行来自 table1
。第二个查询将随机行号分配给 table2
中的行,然后根据需要添加 selects 行以达到总共 10000
.
实际上,select 来自两个 table 的所有行,然后在外部查询中 order by
和 limit
可能更简单:
insert into tableX (col1, col2)
select col1, col2
from (
select col1, col2, 't1' which from table1
union all
select col1, col2, 't2' from table2
) t
order by which, random()
limit 10000
with inparms as (
select 10000 as target_rows
), acount as (
select count(*) as acount, inparms.target_rows
from tablea
cross join inparms
), btag as (
select b.*, 'tableb' as tabsource,
row_number() over (order by random()) as rnum
from tableb
)
select a.*, 'tablea', row_number() over (order by 1) as rnum
from tablea
union all
select b.*
from btag b
join acount a on b.rnum <= a.target_rows - a.acount
;
我的 TableA 有 3000 行(可以是小于 10000 的任何数字)。我需要创建包含 10000 行的 TableX。因此,我需要 select 随机 10000 -(TableA 中的行数)来自 TableB(并添加到 TableA 中)来创建 TableX。有什么想法吗? 像这样的东西(这显然行不通):
Create table TableX as
select * from TableA
union
select * from TableB limit (10000 - count(*) from TableA);
您可以使用 union all
和 window 函数。您没有列出 table 列,所以我假设 col1
和 col2
:
insert into tableX (col1, col2)
select col1, col2 from table1
union all
select t2.col1, t2.col2
from (select t2.*, row_number() over(order by random()) from table2 t2) t2
inner join (select count(*) cnt from table1) t1 on t2.rn <= 10000 - t1.cnt
第一个查询 union all
select 中的所有行来自 table1
。第二个查询将随机行号分配给 table2
中的行,然后根据需要添加 selects 行以达到总共 10000
.
实际上,select 来自两个 table 的所有行,然后在外部查询中 order by
和 limit
可能更简单:
insert into tableX (col1, col2)
select col1, col2
from (
select col1, col2, 't1' which from table1
union all
select col1, col2, 't2' from table2
) t
order by which, random()
limit 10000
with inparms as (
select 10000 as target_rows
), acount as (
select count(*) as acount, inparms.target_rows
from tablea
cross join inparms
), btag as (
select b.*, 'tableb' as tabsource,
row_number() over (order by random()) as rnum
from tableb
)
select a.*, 'tablea', row_number() over (order by 1) as rnum
from tablea
union all
select b.*
from btag b
join acount a on b.rnum <= a.target_rows - a.acount
;