如何正确生成关于主键和外键的大随机 SQL 数据?
How to properly generate big random SQL data respecting primary and foreign keys?
最近我有一个任务要生成基于此 E-R 图的大型测试数据库:https://i.stack.imgur.com/I2kr9.png
我需要超过 300,000 行(合并),并且很容易通过 Excel 使用它的随机函数生成 tables 客户、供应商和产品,但我不知道如何正确创建 Order 和 OrderItem tables,因为我需要将多行(具有匹配 ID)中的每个 UnitPrice 和 Quantity 相乘,以获得位于另一个 table 中的 TotalAmount,当然还有每个PK和FK需要完全匹配
我知道这是一个虚拟问题,但任何小提示都会有所帮助,如果我需要直接通过 SQL 脚本、Excel 或任何其他方式创建都没有关系。
提前致谢!
:) 查看示例 Northwind 数据库。它已经有那些表了。字段名称和计数不匹配 1 对 1,但易于编辑。
您所需要的只是通过简单的插入来增加行数。即:将客户加倍:
insert into customers (FirstName, LastName, City, Country)
select FirstName+'2', LastName+'2', City, Country
from Customers;
300K行一点也不大,其实很小
PS:我假设您会将客户的 Id 更改为 int 标识列。在 Northwind 中是字符数据。
编辑:我承诺的代码。我知道它很丑:
-- Create a tally table
DECLARE @tally TABLE (n INT);
INSERT INTO @tally (n)
SELECT TOP(300000) ROW_NUMBER() OVER (ORDER BY t1.Object_ID)
FROM Master.sys.All_Columns t1
CROSS JOIN Master.sys.All_Columns t2;
-- 300K in total. But let's say we want 300K order items
-- Aprx. 1000 customers, each with 50 orders * 6 items per order
create table Customers (Id int identity primary key,
FirstName varchar(15),
LastName varchar(15),
City varchar(15),
Country varchar(15),
Phone varchar(15) );
create table Orders (Id int identity primary key,
OrderDate datetime,
OrderNumber int,
CustomerId int foreign key references Customers(Id),
TotalAmount money null);
create table Suppliers (id int identity primary key,
CompanyName varchar(15),
ContactName varchar(15),
ContactTitle varchar(15),
City varchar(15),
Country varchar(15),
Phone varchar(15),
Fax varchar(15));
create table Products (Id int identity primary key,
ProductName varchar(50),
SupplierId int foreign key references Suppliers(Id),
UnitPrice money,
Package varchar(20),
IsDiscontinued bit);
create table OrderItems (Id int identity primary key,
OrderId int foreign key references Orders(Id),
ProductId int foreign key references Products(Id),
UnitPrice money,
Quantity int);
INSERT INTO Customers
(
FirstName,
LastName,
City,
Country,
Phone
)
SELECT top 1000 'FirstName'+CAST(n AS VARCHAR(6)),
'LastName'+CAST(n AS VARCHAR(6)),
'City'+CAST(n%10 AS VARCHAR(6)),
'Country'+CAST(n%100 AS VARCHAR(6)),
'Phone'+cast(n as varchar(6))
from @tally;
insert into Orders (OrderDate, OrderNumber, CustomerId)
select t.d, t.n, c.Id
from customers c
cross apply (select top(50) n, dateadd(day, -n, getdate()) from @tally) t(n, d);
insert into Suppliers (CompanyName,
ContactName,
ContactTitle,
City,
Country,
Phone,
Fax)
SELECT top 10 'Company'+CAST(n AS VARCHAR(6)),
'Contact'+CAST(n AS VARCHAR(6)),
'Title'+CAST(n AS VARCHAR(6)),
'City'+CAST(n%10 AS VARCHAR(6)),
'Country'+CAST(n%100 AS VARCHAR(6)),
'Phone'+cast(n as varchar(6)),
'Fax'+cast(n as varchar(6))
from @tally;
with ts(n, sId) as (
select t.n, s.Id
from
(SELECT top(500) n from @tally) t, Suppliers s
)
insert into Products (ProductName,
SupplierId,
UnitPrice,
Package,
IsDiscontinued)
SELECT top(5000) 'Product'+CAST(n AS VARCHAR(6)),
sId,
n * 10,
'Package'+CAST(n%5 AS VARCHAR(6)),
case when n%1500 = 0 then 1 else 0 end
from ts order by newid();
with pdata (oid, pid) aS (
select top(300*1000)
abs(cast(checksum(newid()) as bigint)) % 50000 + 1,
abs(cast(checksum(newid()) as bigint)) % 5000 + 1
from @tally
order by newId())
insert into OrderItems
(OrderId,
ProductId,
UnitPrice,
Quantity)
select d.oid, d.pid, p.UnitPrice, abs(cast(checksum(newid()) as bigint)) % 20 + 1
from pData d inner join Products p on d.pid = p.id
order by d.oid, d.pid;
最近我有一个任务要生成基于此 E-R 图的大型测试数据库:https://i.stack.imgur.com/I2kr9.png
我需要超过 300,000 行(合并),并且很容易通过 Excel 使用它的随机函数生成 tables 客户、供应商和产品,但我不知道如何正确创建 Order 和 OrderItem tables,因为我需要将多行(具有匹配 ID)中的每个 UnitPrice 和 Quantity 相乘,以获得位于另一个 table 中的 TotalAmount,当然还有每个PK和FK需要完全匹配
我知道这是一个虚拟问题,但任何小提示都会有所帮助,如果我需要直接通过 SQL 脚本、Excel 或任何其他方式创建都没有关系。
提前致谢!
:) 查看示例 Northwind 数据库。它已经有那些表了。字段名称和计数不匹配 1 对 1,但易于编辑。
您所需要的只是通过简单的插入来增加行数。即:将客户加倍:
insert into customers (FirstName, LastName, City, Country)
select FirstName+'2', LastName+'2', City, Country
from Customers;
300K行一点也不大,其实很小
PS:我假设您会将客户的 Id 更改为 int 标识列。在 Northwind 中是字符数据。
编辑:我承诺的代码。我知道它很丑:
-- Create a tally table
DECLARE @tally TABLE (n INT);
INSERT INTO @tally (n)
SELECT TOP(300000) ROW_NUMBER() OVER (ORDER BY t1.Object_ID)
FROM Master.sys.All_Columns t1
CROSS JOIN Master.sys.All_Columns t2;
-- 300K in total. But let's say we want 300K order items
-- Aprx. 1000 customers, each with 50 orders * 6 items per order
create table Customers (Id int identity primary key,
FirstName varchar(15),
LastName varchar(15),
City varchar(15),
Country varchar(15),
Phone varchar(15) );
create table Orders (Id int identity primary key,
OrderDate datetime,
OrderNumber int,
CustomerId int foreign key references Customers(Id),
TotalAmount money null);
create table Suppliers (id int identity primary key,
CompanyName varchar(15),
ContactName varchar(15),
ContactTitle varchar(15),
City varchar(15),
Country varchar(15),
Phone varchar(15),
Fax varchar(15));
create table Products (Id int identity primary key,
ProductName varchar(50),
SupplierId int foreign key references Suppliers(Id),
UnitPrice money,
Package varchar(20),
IsDiscontinued bit);
create table OrderItems (Id int identity primary key,
OrderId int foreign key references Orders(Id),
ProductId int foreign key references Products(Id),
UnitPrice money,
Quantity int);
INSERT INTO Customers
(
FirstName,
LastName,
City,
Country,
Phone
)
SELECT top 1000 'FirstName'+CAST(n AS VARCHAR(6)),
'LastName'+CAST(n AS VARCHAR(6)),
'City'+CAST(n%10 AS VARCHAR(6)),
'Country'+CAST(n%100 AS VARCHAR(6)),
'Phone'+cast(n as varchar(6))
from @tally;
insert into Orders (OrderDate, OrderNumber, CustomerId)
select t.d, t.n, c.Id
from customers c
cross apply (select top(50) n, dateadd(day, -n, getdate()) from @tally) t(n, d);
insert into Suppliers (CompanyName,
ContactName,
ContactTitle,
City,
Country,
Phone,
Fax)
SELECT top 10 'Company'+CAST(n AS VARCHAR(6)),
'Contact'+CAST(n AS VARCHAR(6)),
'Title'+CAST(n AS VARCHAR(6)),
'City'+CAST(n%10 AS VARCHAR(6)),
'Country'+CAST(n%100 AS VARCHAR(6)),
'Phone'+cast(n as varchar(6)),
'Fax'+cast(n as varchar(6))
from @tally;
with ts(n, sId) as (
select t.n, s.Id
from
(SELECT top(500) n from @tally) t, Suppliers s
)
insert into Products (ProductName,
SupplierId,
UnitPrice,
Package,
IsDiscontinued)
SELECT top(5000) 'Product'+CAST(n AS VARCHAR(6)),
sId,
n * 10,
'Package'+CAST(n%5 AS VARCHAR(6)),
case when n%1500 = 0 then 1 else 0 end
from ts order by newid();
with pdata (oid, pid) aS (
select top(300*1000)
abs(cast(checksum(newid()) as bigint)) % 50000 + 1,
abs(cast(checksum(newid()) as bigint)) % 5000 + 1
from @tally
order by newId())
insert into OrderItems
(OrderId,
ProductId,
UnitPrice,
Quantity)
select d.oid, d.pid, p.UnitPrice, abs(cast(checksum(newid()) as bigint)) % 20 + 1
from pData d inner join Products p on d.pid = p.id
order by d.oid, d.pid;