甚至将一个组拆分到 1% 以内
Even splitting of a group to within 1%
我的任务是将一组顾客分成每个商店位置的两个相等的组。所请求的结果集将使每个商店位置的两组在客户数量上彼此相差在 1% 以内,在订单数量上彼此相差在 1% 以内,在订购数量上彼此相差在 1% 以内。
下面是我想出的代码,它工作得相当好,大多数时候它都能得到想要的结果,但有时(我认为由于组中的异常值)% 会比 1% 更远.
If OBJECT_ID('tempdb.dbo.#Orders') IS NOT NULL DROP TABLE #Orders
Select
StoreID
,CustomerID
,Sum(OrderID) as Orders
,Sum(OrderAmount) as AmountSold
Into #Orders
From CustomerOrders
Group by StoreID,CustomerID
IF OBJECT_ID('tempdb.dbo.#OrderRanking') IS NOT NULL DROP TABLE #OrderRanking
Select
O.*
,ROW_NUMBER() Over(Partition by StoreID Order by AmountSold, Orders) as Ranking
Into #OrderRanking
From #Orders as O
Select
R.StoreID
,Count(CustomerID) as CustomerCount
,Sum(R.Orders) as Orders
,Sum(R.AmountSold) as Amountsold
,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'
From #OrderRanking as R
Group by
R.StoreID
,Case When Ranking%2 = 0 Then 'A' Else 'B' End
有没有更好的分组方式来保证1%的方差?或者可能是循环遍历几个不同的拆分直到找到 1% 的方法?如果循环需要一个故障安全装置来防止无限循环,以防在不可能的情况下发生分裂,比如在 x 循环之后只进行最接近的分裂。
我正在使用 SQL Server 2012 和 SSMS 2016。感谢您提供的任何帮助。
编辑:
我曾尝试将代码转换为非公司特定的代码,但我弄乱了代码。我意识到这一点并调整了代码以显示真正需要的东西。
Edit2:我自己取得了一些进展,想更新问题。
所以我在这方面做了更多工作,每次你 运行 代码并让它显示每个组的方差时,我都能让它按随机顺序排序。现在我想添加的是一种循环 X 次并保留总体方差最低的方法。这个周末我可能会尝试更多的东西。但现在下面是我所说的新代码。
If OBJECT_ID('tempdb.dbo.#Orders') IS NOT NULL DROP TABLE #Orders
Select
StoreID
,CustomerID
,Sum(OrderID) as Orders
,Sum(OrderAmount) as AmountSold
,Rand() as Random
Into #Orders
From CustomerOrders
Group by StoreID,CustomerID
IF OBJECT_ID('tempdb.dbo.#OrderRanking') IS NOT NULL DROP TABLE #OrderRanking
Select
O.*
,ROW_NUMBER() Over(Partition by StoreID Order by Random) as Ranking
Into #OrderRanking
From #Orders as O
If OBJECT_ID('tempdb.dbo.#Split') IS NOT NULL DROP TABLE #Split
Select
R.StoreID
,Count(CustomerID) as CustomerCount
,Sum(R.Orders) as Orders
,Sum(R.AmountSold) as Amountsold
,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'
Into #Split
From #OrderRanking as R
Group by
R.StoreID
,Case When Ranking%2 = 0 Then 'A' Else 'B' End
Select
S.StoreID
,((Cast(Max(Case When S.[Grouping] = 'A' Then S.CustomerCount Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))*100 as CustomerCountVar
,((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100 as OrderVar
,((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100 as AmountsoldVar
From #Split as S
Group by S.StoreID
因此,确实不可能像我们所有人预期的那样始终保持在 1% 以内,但就像我说的那样,我们可以在尝试 X 次后尝试尽可能接近。我已经想出如何做到这一点。下面是我使用的代码,目前设置为 10 次尝试,但可以更改为适合业务的任何数字。
If OBJECT_ID('tempdb.dbo.#TestB') IS NOT NULL DROP TABLE #TestB
Create Table #TestB
(
StoreID int
,CustomerID VarChar(11)
,Orders int
,AmountSold Float
,Random Float
,Ranking bigint
,CombinedVar Decimal(18,2)
)
If OBJECT_ID('tempdb.dbo.#BestPrep') IS NOT NULL DROP TABLE #BestPrep
Create Table #BestPrep
(
StoreID int
,CustomerID VarChar(11)
,Orders int
,AmountSold Float
,Random Float
,Ranking bigint
,CombinedVar Decimal(18,2)
)
Declare @Giveup int
Set @GiveUp = 10
WHILE @GiveUp > 0
BEGIN
If OBJECT_ID('tempdb.dbo.#Orders') IS NOT NULL DROP TABLE #Orders
Select
StoreID
,CustomerID
,Sum(OrderID) as Orders
,Sum(OrderAmount) as AmountSold
,Rand() as Random
Into #Orders
From CustomerOrders
Group by StoreID,CustomerID
IF OBJECT_ID('tempdb.dbo.#OrderRanking') IS NOT NULL DROP TABLE #OrderRanking
Select
O.*
,ROW_NUMBER() Over(Partition by StoreID Order by Random) as Ranking
Into #OrderRanking
From #Orders as O
If OBJECT_ID('tempdb.dbo.#Split') IS NOT NULL DROP TABLE #Split
Select
R.StoreID
,Count(CustomerID) as CustomerCount
,Sum(R.Orders) as Orders
,Sum(R.AmountSold) as Amountsold
,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'
Into #Split
From #OrderRanking as R
Group by
R.StoreID
,Case When Ranking%2 = 0 Then 'A' Else 'B' End
If OBJECT_ID('Tempdb.dbo.#Var') IS NOT NULL DROP TABLE #Var
Select
S.StoreID
,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.CustomerCount Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))*100) as CustomerCountVar
,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100) as OrderVar
,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100) as AmountsoldVar
,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100)
+
ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100) as CombinedVar
INTO #Var
From #Split as S
Group by S.StoreID
If Exists (Select * From #Var Where (OrderVar < 1 and AmountSoldVar <1) Or CombinedVar < 2)
If Object_ID('tempdb.dbo.#TestA') IS NOT NULL DROP TABLE #TestA
Select
A.StoreID
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,V.CombinedVar
Into #TestA
From #OrderRanking as A
Join #var as V
on A.StoreID = V.StoreID
Where A.StoreID in
(Select StoreID From #Var Where (OrderVar < 1 and AmountSoldVar <1) Or CombinedVar < 2)
Insert Into #TestB
Select
A.StoreID
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,A.CombinedVar
From #TestA as A
Left Join #TestB as B
on A.CustomerID = B.CustomerID
Where
B.CustomerID is null
Insert Into #BestPrep
Select
A.StoreID
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,V.CombinedVar
From #OrderRanking as A
Join #Var as V
on A.StoreID = V.StoreID
Left Join #BestPrep as B
on A.CustomerID = B.CustomerID
and V.CombinedVar > B.CombinedVar
Where
B.CustomerID is null
Set @Giveup = @Giveup-1
END
If Object_ID('tempdb.dbo.#bestPrep2') IS NOT NULL DROP TABLE #bestPrep2
Select
A.StoreID
,Min(CombinedVar) as CombinedVar
Into #BestPrep2
From #BestPrep as A
Group by
A.StoreID
Select A.*
From #BestPrep as A
Join #BestPrep2 as B
on A.StoreID = B.StoreID
and A.CombinedVar = B.CombinedVar
Union
Select * From #TestB
我的任务是将一组顾客分成每个商店位置的两个相等的组。所请求的结果集将使每个商店位置的两组在客户数量上彼此相差在 1% 以内,在订单数量上彼此相差在 1% 以内,在订购数量上彼此相差在 1% 以内。
下面是我想出的代码,它工作得相当好,大多数时候它都能得到想要的结果,但有时(我认为由于组中的异常值)% 会比 1% 更远.
If OBJECT_ID('tempdb.dbo.#Orders') IS NOT NULL DROP TABLE #Orders
Select
StoreID
,CustomerID
,Sum(OrderID) as Orders
,Sum(OrderAmount) as AmountSold
Into #Orders
From CustomerOrders
Group by StoreID,CustomerID
IF OBJECT_ID('tempdb.dbo.#OrderRanking') IS NOT NULL DROP TABLE #OrderRanking
Select
O.*
,ROW_NUMBER() Over(Partition by StoreID Order by AmountSold, Orders) as Ranking
Into #OrderRanking
From #Orders as O
Select
R.StoreID
,Count(CustomerID) as CustomerCount
,Sum(R.Orders) as Orders
,Sum(R.AmountSold) as Amountsold
,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'
From #OrderRanking as R
Group by
R.StoreID
,Case When Ranking%2 = 0 Then 'A' Else 'B' End
有没有更好的分组方式来保证1%的方差?或者可能是循环遍历几个不同的拆分直到找到 1% 的方法?如果循环需要一个故障安全装置来防止无限循环,以防在不可能的情况下发生分裂,比如在 x 循环之后只进行最接近的分裂。
我正在使用 SQL Server 2012 和 SSMS 2016。感谢您提供的任何帮助。
编辑: 我曾尝试将代码转换为非公司特定的代码,但我弄乱了代码。我意识到这一点并调整了代码以显示真正需要的东西。
Edit2:我自己取得了一些进展,想更新问题。
所以我在这方面做了更多工作,每次你 运行 代码并让它显示每个组的方差时,我都能让它按随机顺序排序。现在我想添加的是一种循环 X 次并保留总体方差最低的方法。这个周末我可能会尝试更多的东西。但现在下面是我所说的新代码。
If OBJECT_ID('tempdb.dbo.#Orders') IS NOT NULL DROP TABLE #Orders
Select
StoreID
,CustomerID
,Sum(OrderID) as Orders
,Sum(OrderAmount) as AmountSold
,Rand() as Random
Into #Orders
From CustomerOrders
Group by StoreID,CustomerID
IF OBJECT_ID('tempdb.dbo.#OrderRanking') IS NOT NULL DROP TABLE #OrderRanking
Select
O.*
,ROW_NUMBER() Over(Partition by StoreID Order by Random) as Ranking
Into #OrderRanking
From #Orders as O
If OBJECT_ID('tempdb.dbo.#Split') IS NOT NULL DROP TABLE #Split
Select
R.StoreID
,Count(CustomerID) as CustomerCount
,Sum(R.Orders) as Orders
,Sum(R.AmountSold) as Amountsold
,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'
Into #Split
From #OrderRanking as R
Group by
R.StoreID
,Case When Ranking%2 = 0 Then 'A' Else 'B' End
Select
S.StoreID
,((Cast(Max(Case When S.[Grouping] = 'A' Then S.CustomerCount Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))*100 as CustomerCountVar
,((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100 as OrderVar
,((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100 as AmountsoldVar
From #Split as S
Group by S.StoreID
因此,确实不可能像我们所有人预期的那样始终保持在 1% 以内,但就像我说的那样,我们可以在尝试 X 次后尝试尽可能接近。我已经想出如何做到这一点。下面是我使用的代码,目前设置为 10 次尝试,但可以更改为适合业务的任何数字。
If OBJECT_ID('tempdb.dbo.#TestB') IS NOT NULL DROP TABLE #TestB
Create Table #TestB
(
StoreID int
,CustomerID VarChar(11)
,Orders int
,AmountSold Float
,Random Float
,Ranking bigint
,CombinedVar Decimal(18,2)
)
If OBJECT_ID('tempdb.dbo.#BestPrep') IS NOT NULL DROP TABLE #BestPrep
Create Table #BestPrep
(
StoreID int
,CustomerID VarChar(11)
,Orders int
,AmountSold Float
,Random Float
,Ranking bigint
,CombinedVar Decimal(18,2)
)
Declare @Giveup int
Set @GiveUp = 10
WHILE @GiveUp > 0
BEGIN
If OBJECT_ID('tempdb.dbo.#Orders') IS NOT NULL DROP TABLE #Orders
Select
StoreID
,CustomerID
,Sum(OrderID) as Orders
,Sum(OrderAmount) as AmountSold
,Rand() as Random
Into #Orders
From CustomerOrders
Group by StoreID,CustomerID
IF OBJECT_ID('tempdb.dbo.#OrderRanking') IS NOT NULL DROP TABLE #OrderRanking
Select
O.*
,ROW_NUMBER() Over(Partition by StoreID Order by Random) as Ranking
Into #OrderRanking
From #Orders as O
If OBJECT_ID('tempdb.dbo.#Split') IS NOT NULL DROP TABLE #Split
Select
R.StoreID
,Count(CustomerID) as CustomerCount
,Sum(R.Orders) as Orders
,Sum(R.AmountSold) as Amountsold
,Case When Ranking%2 = 0 Then 'A' Else 'B' End as 'Grouping'
Into #Split
From #OrderRanking as R
Group by
R.StoreID
,Case When Ranking%2 = 0 Then 'A' Else 'B' End
If OBJECT_ID('Tempdb.dbo.#Var') IS NOT NULL DROP TABLE #Var
Select
S.StoreID
,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.CustomerCount Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.CustomerCount Else 0 End) as decimal(18,2)))*100) as CustomerCountVar
,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100) as OrderVar
,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100) as AmountsoldVar
,ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Orders Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Orders Else 0 End) as decimal(18,2)))*100)
+
ABS(((Cast(Max(Case When S.[Grouping] = 'A' Then S.Amountsold Else 0 End) as decimal(18,2))-Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))
/ Cast(Max(Case When S.[Grouping] = 'B' Then S.Amountsold Else 0 End) as decimal(18,2)))*100) as CombinedVar
INTO #Var
From #Split as S
Group by S.StoreID
If Exists (Select * From #Var Where (OrderVar < 1 and AmountSoldVar <1) Or CombinedVar < 2)
If Object_ID('tempdb.dbo.#TestA') IS NOT NULL DROP TABLE #TestA
Select
A.StoreID
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,V.CombinedVar
Into #TestA
From #OrderRanking as A
Join #var as V
on A.StoreID = V.StoreID
Where A.StoreID in
(Select StoreID From #Var Where (OrderVar < 1 and AmountSoldVar <1) Or CombinedVar < 2)
Insert Into #TestB
Select
A.StoreID
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,A.CombinedVar
From #TestA as A
Left Join #TestB as B
on A.CustomerID = B.CustomerID
Where
B.CustomerID is null
Insert Into #BestPrep
Select
A.StoreID
,A.CustomerID
,A.Orders
,A.AmountSold
,A.Random
,A.Ranking
,V.CombinedVar
From #OrderRanking as A
Join #Var as V
on A.StoreID = V.StoreID
Left Join #BestPrep as B
on A.CustomerID = B.CustomerID
and V.CombinedVar > B.CombinedVar
Where
B.CustomerID is null
Set @Giveup = @Giveup-1
END
If Object_ID('tempdb.dbo.#bestPrep2') IS NOT NULL DROP TABLE #bestPrep2
Select
A.StoreID
,Min(CombinedVar) as CombinedVar
Into #BestPrep2
From #BestPrep as A
Group by
A.StoreID
Select A.*
From #BestPrep as A
Join #BestPrep2 as B
on A.StoreID = B.StoreID
and A.CombinedVar = B.CombinedVar
Union
Select * From #TestB