创建 10 个不同的随机数
Create 10 different random numbers
我想生成一些随机数据。 ~50k 行
每一行都有自数 ID 和从 1 到 90 的 10 个数值。
+----+------+------+------+------+------+------+------+------+------+------+
| id | nr1 | nr2 | nr3 | nr4 | nr5 | nr6 | nr7 | nr8 | nr9 | nr10 |
+----+------+------+------+------+------+------+------+------+------+------+
| 1 | 1 | 39 | 19 | 23 | 28 | 80 | 3 | 42 | 60 | 32 |
+----+------+------+------+------+------+------+------+------+------+------+
| 2 | 43 | 18 | 3 | 24 | 29 | 33 | 15 | 1 | 61 | 80 |
+----+------+------+------+------+------+------+------+------+------+------+
| 3 | 11 | 25 | 33 | 2 | 30 | 3 | 1 | 44 | 62 | 78 |
+----+------+------+------+------+------+------+------+------+------+------+
所以每一行需要有 10 个不同的值。
我可以生成 10 个随机数,但自己比较所有 10 个随机数以确保没有重复值会花费很多时间。
INSERT INTO test
SELECT trunc(random() * 89 + 1) as nr1,
trunc(random() * 89 + 1) as nr2,
trunc(random() * 89 + 1) as nr3,
trunc(random() * 89 + 1) as nr4,
trunc(random() * 89 + 1) as nr5,
trunc(random() * 89 + 1) as nr6,
trunc(random() * 89 + 1) as nr7,
trunc(random() * 89 + 1) as nr8,
trunc(random() * 89 + 1) as nr9,
trunc(random() * 89 + 1) as nr10
FROM generate_series(1,50);
那么执行该任务的更有效方法是什么。
为什么不随机化你想要的范围内的数字顺序(比如 1,100),然后使用前 10 个。
一个循环和一个枢轴就可以做到:
declare @result table (id int, nr1 int, nr2 int, nr3 int, nr4 int, nr5 int, nr6 int, nr7 int, nr8 int, nr9 int, nr10 int)
declare @numberSet table (n int)
declare @i table (rw int,rnk int, n int)
insert into @numberset
SELECT DISTINCT n = number
FROM master..[spt_values]
WHERE number BETWEEN 1 AND 100
declare @l int
set @l = 0
WHILE (@l < 50000)
BEGIN
insert into @i
select top 10 @l+1,rank() over (order by id), n from (select newid() id , n from @numberset ) b
SELECT @l = @l + 1
END
INSERT INTO @result
SELECT rw,[1],[2],[3],[4],[5],[6],[7],[8],[9],[10]
FROM (SELECT rw, n,rnk from @i) as sourcetable
PIVOT
(max(n) for rnk in ([1],[2],[3],[4],[5],[6],[7],[8],[9],[10])
) as pivottable;
SELECT * FROM @result
尝试:
-- INSERT INTO table( ...... )
CREATE TABLE test111 AS
SELECT
max( case when rn = 1 then nr end ) nr1,
max( case when rn = 2 then nr end ) nr2,
max( case when rn = 3 then nr end ) nr3,
max( case when rn = 4 then nr end ) nr4,
max( case when rn = 5 then nr end ) nr5,
max( case when rn = 6 then nr end ) nr6,
max( case when rn = 7 then nr end ) nr7,
max( case when rn = 8 then nr end ) nr8,
max( case when rn = 9 then nr end ) nr9,
max( case when rn = 10 then nr end ) nr10
FROM generate_series(1,50000) id,
LATERAL (
SELECT nr,
row_number() over ( ORDER BY id * random() ) rn
FROM generate_series(1,90) nr
) x
GROUP BY id
这个 CREATE TABLE
查询在我的电脑上大约需要 8-9 秒:
Query OK, 50000 rows affected (execution time: 8,016 sec; total time: 8,016 sec)
编辑 - PostgreSQL 版本 <= 9.2
CREATE TABLE test222 AS
SELECT
max( case when rn = 1 then nr end ) nr1,
max( case when rn = 2 then nr end ) nr2,
max( case when rn = 3 then nr end ) nr3,
max( case when rn = 4 then nr end ) nr4,
max( case when rn = 5 then nr end ) nr5,
max( case when rn = 6 then nr end ) nr6,
max( case when rn = 7 then nr end ) nr7,
max( case when rn = 8 then nr end ) nr8,
max( case when rn = 9 then nr end ) nr9,
max( case when rn = 10 then nr end ) nr10
FROM (
SELECT id, nr, row_number() over (partition by id order by random()) rn
FROM generate_series(1,50000) id
CROSS JOIN (
SELECT * FROM generate_series(1,90) nr
) x
) y
WHERE rn <= 10
GROUP BY id
这个版本在我的电脑上大约需要 7-8 秒:
Query OK, 50000 rows affected (execution time: 7,516 sec; total time: 7,516 sec)
我想生成一些随机数据。 ~50k 行
每一行都有自数 ID 和从 1 到 90 的 10 个数值。
+----+------+------+------+------+------+------+------+------+------+------+
| id | nr1 | nr2 | nr3 | nr4 | nr5 | nr6 | nr7 | nr8 | nr9 | nr10 |
+----+------+------+------+------+------+------+------+------+------+------+
| 1 | 1 | 39 | 19 | 23 | 28 | 80 | 3 | 42 | 60 | 32 |
+----+------+------+------+------+------+------+------+------+------+------+
| 2 | 43 | 18 | 3 | 24 | 29 | 33 | 15 | 1 | 61 | 80 |
+----+------+------+------+------+------+------+------+------+------+------+
| 3 | 11 | 25 | 33 | 2 | 30 | 3 | 1 | 44 | 62 | 78 |
+----+------+------+------+------+------+------+------+------+------+------+
所以每一行需要有 10 个不同的值。
我可以生成 10 个随机数,但自己比较所有 10 个随机数以确保没有重复值会花费很多时间。
INSERT INTO test
SELECT trunc(random() * 89 + 1) as nr1,
trunc(random() * 89 + 1) as nr2,
trunc(random() * 89 + 1) as nr3,
trunc(random() * 89 + 1) as nr4,
trunc(random() * 89 + 1) as nr5,
trunc(random() * 89 + 1) as nr6,
trunc(random() * 89 + 1) as nr7,
trunc(random() * 89 + 1) as nr8,
trunc(random() * 89 + 1) as nr9,
trunc(random() * 89 + 1) as nr10
FROM generate_series(1,50);
那么执行该任务的更有效方法是什么。
为什么不随机化你想要的范围内的数字顺序(比如 1,100),然后使用前 10 个。
一个循环和一个枢轴就可以做到:
declare @result table (id int, nr1 int, nr2 int, nr3 int, nr4 int, nr5 int, nr6 int, nr7 int, nr8 int, nr9 int, nr10 int)
declare @numberSet table (n int)
declare @i table (rw int,rnk int, n int)
insert into @numberset
SELECT DISTINCT n = number
FROM master..[spt_values]
WHERE number BETWEEN 1 AND 100
declare @l int
set @l = 0
WHILE (@l < 50000)
BEGIN
insert into @i
select top 10 @l+1,rank() over (order by id), n from (select newid() id , n from @numberset ) b
SELECT @l = @l + 1
END
INSERT INTO @result
SELECT rw,[1],[2],[3],[4],[5],[6],[7],[8],[9],[10]
FROM (SELECT rw, n,rnk from @i) as sourcetable
PIVOT
(max(n) for rnk in ([1],[2],[3],[4],[5],[6],[7],[8],[9],[10])
) as pivottable;
SELECT * FROM @result
尝试:
-- INSERT INTO table( ...... )
CREATE TABLE test111 AS
SELECT
max( case when rn = 1 then nr end ) nr1,
max( case when rn = 2 then nr end ) nr2,
max( case when rn = 3 then nr end ) nr3,
max( case when rn = 4 then nr end ) nr4,
max( case when rn = 5 then nr end ) nr5,
max( case when rn = 6 then nr end ) nr6,
max( case when rn = 7 then nr end ) nr7,
max( case when rn = 8 then nr end ) nr8,
max( case when rn = 9 then nr end ) nr9,
max( case when rn = 10 then nr end ) nr10
FROM generate_series(1,50000) id,
LATERAL (
SELECT nr,
row_number() over ( ORDER BY id * random() ) rn
FROM generate_series(1,90) nr
) x
GROUP BY id
这个 CREATE TABLE
查询在我的电脑上大约需要 8-9 秒:
Query OK, 50000 rows affected (execution time: 8,016 sec; total time: 8,016 sec)
编辑 - PostgreSQL 版本 <= 9.2
CREATE TABLE test222 AS
SELECT
max( case when rn = 1 then nr end ) nr1,
max( case when rn = 2 then nr end ) nr2,
max( case when rn = 3 then nr end ) nr3,
max( case when rn = 4 then nr end ) nr4,
max( case when rn = 5 then nr end ) nr5,
max( case when rn = 6 then nr end ) nr6,
max( case when rn = 7 then nr end ) nr7,
max( case when rn = 8 then nr end ) nr8,
max( case when rn = 9 then nr end ) nr9,
max( case when rn = 10 then nr end ) nr10
FROM (
SELECT id, nr, row_number() over (partition by id order by random()) rn
FROM generate_series(1,50000) id
CROSS JOIN (
SELECT * FROM generate_series(1,90) nr
) x
) y
WHERE rn <= 10
GROUP BY id
这个版本在我的电脑上大约需要 7-8 秒:
Query OK, 50000 rows affected (execution time: 7,516 sec; total time: 7,516 sec)