根据另一个 table 的计数,来自 table 的红移样本

Redshift sample from table based on count of another table

我的 TableA 有 3000 行(可以是小于 10000 的任何数字)。我需要创建包含 10000 行的 TableX。因此,我需要 select 随机 10000 -(TableA 中的行数)来自 TableB(并添加到 TableA 中)来创建 TableX。有什么想法吗? 像这样的东西(这显然行不通):

Create table TableX as
select * from TableA
union
select * from TableB limit (10000 - count(*) from TableA);

您可以使用 union all 和 window 函数。您没有列出 table 列,所以我假设 col1col2:

insert into tableX (col1, col2)
select col1, col2 from table1
union all 
select t2.col1, t2.col2
from (select t2.*, row_number() over(order by random()) from table2 t2) t2
inner join (select count(*) cnt from table1) t1 on t2.rn <= 10000 - t1.cnt

第一个查询 union all select 中的所有行来自 table1。第二个查询将随机行号分配给 table2 中的行,然后根据需要添加 selects 行以达到总共 10000.

实际上,select 来自两个 table 的所有行,然后在外部查询中 order bylimit 可能更简单:

insert into tableX (col1, col2)
select col1, col2
from (
    select col1, col2, 't1' which from table1
    union all 
    select col1, col2, 't2' from table2
) t
order by which, random()
limit 10000
with inparms as (
  select 10000 as target_rows
), acount as (
  select count(*) as acount, inparms.target_rows 
    from tablea
   cross join inparms
), btag as (
  select b.*, 'tableb' as tabsource, 
         row_number() over (order by random()) as rnum
    from tableb
)
select a.*, 'tablea', row_number() over (order by 1) as rnum
  from tablea
union all
select b.*
  from btag b
  join acount a on b.rnum <= a.target_rows - a.acount
;