重复删除最大平均子数组
Repeatedly removing the maximum average subarray
我有一个正整数数组。例如:
[1, 7, 8, 4, 2, 1, 4]
“减少操作”找到具有最高平均值的数组前缀,并将其删除。这里,数组前缀是指一个连续的子数组,其左端是数组的开头,例如上面的 [1]
或 [1, 7]
或 [1, 7, 8]
。通过使用较长的前缀来打破平局。
Original array: [ 1, 7, 8, 4, 2, 1, 4]
Prefix averages: [1.0, 4.0, 5.3, 5.0, 4.4, 3.8, 3.9]
-> Delete [1, 7, 8], with maximum average 5.3
-> New array -> [4, 2, 1, 4]
我会重复归约操作直到数组为空:
[1, 7, 8, 4, 2, 1, 4]
^ ^
[4, 2, 1, 4]
^ ^
[2, 1, 4]
^ ^
[]
现在,实际上不需要执行这些数组修改;我只是在寻找将被这个过程删除的前缀长度列表,例如上面的[3, 1, 3]
。
计算这些前缀长度的有效算法是什么?
天真的方法是在每次迭代中重新计算 O(n^2)
算法的所有总和和平均值——我在下面附加了 Python 代码。我正在寻找对这种方法的任何改进——最好是 O(n^2)
以下的任何解决方案,但具有相同复杂性但更好的常数因子的算法也会有所帮助。
以下是我尝试过的一些方法(但没有成功):
- 动态维护前缀和,例如 Binary Indexed Tree。虽然我可以在
O(log n)
时间内轻松更新前缀 sums 或找到最大前缀 sum ,但我还没有找到任何数据结构可以更新 平均值,因为平均值中的分母正在改变。
- 重复使用之前的 'rankings' 前缀平均值——这些排名可能会发生变化,例如在某些数组中,以索引
5
结尾的前缀可能比以索引 6
结尾的前缀具有更大的平均值,但是在删除前 3 个元素之后,现在以索引 2
结尾的前缀可能 比 3
结尾的平均值小 。
- 寻找前缀结束的模式;例如,任何最大平均前缀的最右边的元素始终是数组中的局部最大值,但不清楚这有多大帮助。
这是朴素的二次方法的有效Python实现:
from fractions import Fraction
def find_array_reductions(nums: List[int]) -> List[int]:
"""Return list of lengths of max average prefix reductions."""
def max_prefix_avg(arr: List[int]) -> Tuple[float, int]:
"""Return value and length of max average prefix in arr."""
if len(arr) == 0:
return (-math.inf, 0)
best_length = 1
best_average = Fraction(0, 1)
running_sum = 0
for i, x in enumerate(arr, 1):
running_sum += x
new_average = Fraction(running_sum, i)
if new_average >= best_average:
best_average = new_average
best_length = i
return (float(best_average), best_length)
removed_lengths = []
total_removed = 0
while total_removed < len(nums):
_, new_removal = max_prefix_avg(nums[total_removed:])
removed_lengths.append(new_removal)
total_removed += new_removal
return removed_lengths
编辑:最初发布的代码有一个罕见的错误,即使用 Python 的 math.isclose()
的默认参数进行浮点比较,而不是适当的分数比较。这已在当前代码中修复。如果您好奇的话,可以在此 Try it online link 找到错误的示例,以及解释导致此错误的确切原因的前言。
这个问题有一个有趣的 O(n) 解决方案。
如果绘制累积总和与指数的关系图,则:
任意两个索引之间的子数组中的平均值是图表上这些点之间的线的斜率。
第一个 highest-average-prefix 将在与 0 成最大角度的点结束。接下来的 highest-average-prefix 必须有一个 更小的 平均值,并且它将在与第一个结尾形成最大角度的点结束。继续到数组的末尾,我们发现...
这些平均值最高的线段恰好是累积和图上凸包中的线段。
使用 monotone chain 算法找到这些片段。由于这些点已经排序,因此需要 O(n) 时间。
# Lengths of the segments in the upper convex hull
# of the cumulative sum graph
def upperSumHullLengths(arr):
if len(arr) < 2:
if len(arr) < 1:
return []
else:
return [1]
hull = [(0, 0),(1, arr[0])]
for x in range(2, len(arr)+1):
# this has x coordinate x-1
prevPoint = hull[len(hull) - 1]
# next point in cumulative sum
point = (x, prevPoint[1] + arr[x-1])
# remove points not on the convex hull
while len(hull) >= 2:
p0 = hull[len(hull)-2]
dx0 = prevPoint[0] - p0[0]
dy0 = prevPoint[1] - p0[1]
dx1 = x - prevPoint[0]
dy1 = point[1] - prevPoint[1]
if dy1*dx0 < dy0*dx1:
break
hull.pop()
prevPoint = p0
hull.append(point)
return [hull[i+1][0] - hull[i][0] for i in range(0, len(hull)-1)]
print(upperSumHullLengths([ 1, 7, 8, 4, 2, 1, 4]))
打印:
[3, 1, 3]
Matt 和 kcsquared 的解决方案和一些基准的一些简化版本:
from itertools import accumulate, pairwise
def Matt_Pychoed(arr):
hull = [(0, 0)]
for x, y in enumerate(accumulate(arr), 1):
while len(hull) >= 2:
(x0, y0), (x1, y1) = hull[-2:]
dx0 = x1 - x0
dy0 = y1 - y0
dx1 = x - x1
dy1 = y - y1
if dy1*dx0 < dy0*dx1:
break
hull.pop()
hull.append((x, y))
return [q[0] - p[0] for p, q in pairwise(hull)]
from itertools import accumulate, count
from operator import truediv
def kc_Pychoed_2(nums):
removals = []
while nums:
averages = map(truediv, accumulate(nums), count(1))
remove = max(zip(averages, count(1)))[1]
removals.append(remove)
nums = nums[remove:]
return removals
使用 100,000 个从 1 到 1000 的随机整数的 20 个不同数组进行基准测试:
min median mean max
65 ms 164 ms 159 ms 249 ms kc
38 ms 98 ms 92 ms 146 ms kc_Pychoed_1
58 ms 127 ms 120 ms 189 ms kc_Pychoed_2
134 ms 137 ms 138 ms 157 ms Matt
101 ms 102 ms 103 ms 111 ms Matt_Pychoed
其中 kc_Pychoed_1
是 kcsquared,但有整数 running_sum
而没有 math.isclose
。我验证所有解决方案对每个输入都计算出相同的结果。
对于此类 随机 数据,kcsquared 似乎介于 O(n) 和 O(n log n) 之间。但如果数组严格递减,它就会退化为二次方。 arr = [1000, 999, 998, ..., 2, 1]
我得到:
min median mean max
102 ms 106 ms 107 ms 116 ms kc
60 ms 61 ms 61 ms 62 ms kc_Pychoed_1
76 ms 77 ms 77 ms 86 ms kc_Pychoed_2
0 ms 1 ms 1 ms 1 ms Matt
0 ms 0 ms 0 ms 0 ms Matt_Pychoed
基准代码(Try it online!):
from timeit import default_timer as timer
from statistics import mean, median
import random
from typing import List, Tuple
import math
from itertools import accumulate, count
from operator import truediv
def kc(nums: List[int]) -> List[int]:
"""Return list of lengths of max average prefix reductions."""
def max_prefix_avg(arr: List[int]) -> Tuple[float, int]:
"""Return value and length of max average prefix in arr"""
if len(arr) == 0:
return (-math.inf, 0)
best_length = 1
best_average = -math.inf
running_sum = 0.0
for i, x in enumerate(arr, 1):
running_sum += x
new_average = running_sum / i
if (new_average >= best_average
or math.isclose(new_average, best_average)):
best_average = new_average
best_length = i
return (best_average, best_length)
removed_lengths = []
total_removed = 0
while total_removed < len(nums):
_, new_removal = max_prefix_avg(nums[total_removed:])
removed_lengths.append(new_removal)
total_removed += new_removal
return removed_lengths
def kc_Pychoed_1(nums: List[int]) -> List[int]:
"""Return list of lengths of max average prefix reductions."""
def max_prefix_avg(arr: List[int]) -> Tuple[float, int]:
"""Return value and length of max average prefix in arr"""
if len(arr) == 0:
return (-math.inf, 0)
best_length = 1
best_average = -math.inf
running_sum = 0
for i, x in enumerate(arr, 1):
running_sum += x
new_average = running_sum / i
if new_average >= best_average:
best_average = new_average
best_length = i
return (best_average, best_length)
removed_lengths = []
total_removed = 0
while total_removed < len(nums):
_, new_removal = max_prefix_avg(nums[total_removed:])
removed_lengths.append(new_removal)
total_removed += new_removal
return removed_lengths
def kc_Pychoed_2(nums):
removals = []
while nums:
averages = map(truediv, accumulate(nums), count(1))
remove = max(zip(averages, count(1)))[1]
removals.append(remove)
nums = nums[remove:]
return removals
# Lengths of the segments in the upper convex hull
# of the cumulative sum graph
def Matt(arr):
if len(arr) < 2:
if len(arr) < 1:
return []
else:
return [1]
hull = [(0, 0),(1, arr[0])]
for x in range(2, len(arr)+1):
# this has x coordinate x-1
prevPoint = hull[len(hull) - 1]
# next point in cumulative sum
point = (x, prevPoint[1] + arr[x-1])
# remove points not on the convex hull
while len(hull) >= 2:
p0 = hull[len(hull)-2]
dx0 = prevPoint[0] - p0[0]
dy0 = prevPoint[1] - p0[1]
dx1 = x - prevPoint[0]
dy1 = point[1] - prevPoint[1]
if dy1*dx0 < dy0*dx1:
break
hull.pop()
prevPoint = p0
hull.append(point)
return [hull[i+1][0] - hull[i][0] for i in range(0, len(hull)-1)]
def pairwise(lst):
return zip(lst, lst[1:])
def Matt_Pychoed(arr):
hull = [(0, 0)]
for x, y in enumerate(accumulate(arr), 1):
while len(hull) >= 2:
(x0, y0), (x1, y1) = hull[-2:]
dx0 = x1 - x0
dy0 = y1 - y0
dx1 = x - x1
dy1 = y - y1
if dy1*dx0 < dy0*dx1:
break
hull.pop()
hull.append((x, y))
return [q[0] - p[0] for p, q in pairwise(hull)]
funcs = kc, kc_Pychoed_1, kc_Pychoed_2, Matt, Matt_Pychoed
stats = min, median, mean, max
tss = [[] for _ in funcs]
for r in range(1, 21):
print(f'After round {r}:')
arr = random.choices(range(1, 1001), k=100_000)
# arr = list(range(1000, 1, -1))
expect = None
print(*(f'{stat.__name__:^7}' for stat in stats))
for func, ts in zip(funcs, tss):
t0 = timer()
result = func(arr)
t1 = timer()
ts.append(t1 - t0)
if expect is None:
expect = result
assert result == expect
print(*('%3d ms ' % (stat(ts) * 1e3) for stat in stats), func.__name__)
print()
我有一个正整数数组。例如:
[1, 7, 8, 4, 2, 1, 4]
“减少操作”找到具有最高平均值的数组前缀,并将其删除。这里,数组前缀是指一个连续的子数组,其左端是数组的开头,例如上面的 [1]
或 [1, 7]
或 [1, 7, 8]
。通过使用较长的前缀来打破平局。
Original array: [ 1, 7, 8, 4, 2, 1, 4]
Prefix averages: [1.0, 4.0, 5.3, 5.0, 4.4, 3.8, 3.9]
-> Delete [1, 7, 8], with maximum average 5.3
-> New array -> [4, 2, 1, 4]
我会重复归约操作直到数组为空:
[1, 7, 8, 4, 2, 1, 4]
^ ^
[4, 2, 1, 4]
^ ^
[2, 1, 4]
^ ^
[]
现在,实际上不需要执行这些数组修改;我只是在寻找将被这个过程删除的前缀长度列表,例如上面的[3, 1, 3]
。
计算这些前缀长度的有效算法是什么?
天真的方法是在每次迭代中重新计算 O(n^2)
算法的所有总和和平均值——我在下面附加了 Python 代码。我正在寻找对这种方法的任何改进——最好是 O(n^2)
以下的任何解决方案,但具有相同复杂性但更好的常数因子的算法也会有所帮助。
以下是我尝试过的一些方法(但没有成功):
- 动态维护前缀和,例如 Binary Indexed Tree。虽然我可以在
O(log n)
时间内轻松更新前缀 sums 或找到最大前缀 sum ,但我还没有找到任何数据结构可以更新 平均值,因为平均值中的分母正在改变。 - 重复使用之前的 'rankings' 前缀平均值——这些排名可能会发生变化,例如在某些数组中,以索引
5
结尾的前缀可能比以索引6
结尾的前缀具有更大的平均值,但是在删除前 3 个元素之后,现在以索引2
结尾的前缀可能 比3
结尾的平均值小 。 - 寻找前缀结束的模式;例如,任何最大平均前缀的最右边的元素始终是数组中的局部最大值,但不清楚这有多大帮助。
这是朴素的二次方法的有效Python实现:
from fractions import Fraction
def find_array_reductions(nums: List[int]) -> List[int]:
"""Return list of lengths of max average prefix reductions."""
def max_prefix_avg(arr: List[int]) -> Tuple[float, int]:
"""Return value and length of max average prefix in arr."""
if len(arr) == 0:
return (-math.inf, 0)
best_length = 1
best_average = Fraction(0, 1)
running_sum = 0
for i, x in enumerate(arr, 1):
running_sum += x
new_average = Fraction(running_sum, i)
if new_average >= best_average:
best_average = new_average
best_length = i
return (float(best_average), best_length)
removed_lengths = []
total_removed = 0
while total_removed < len(nums):
_, new_removal = max_prefix_avg(nums[total_removed:])
removed_lengths.append(new_removal)
total_removed += new_removal
return removed_lengths
编辑:最初发布的代码有一个罕见的错误,即使用 Python 的 math.isclose()
的默认参数进行浮点比较,而不是适当的分数比较。这已在当前代码中修复。如果您好奇的话,可以在此 Try it online link 找到错误的示例,以及解释导致此错误的确切原因的前言。
这个问题有一个有趣的 O(n) 解决方案。
如果绘制累积总和与指数的关系图,则:
任意两个索引之间的子数组中的平均值是图表上这些点之间的线的斜率。
第一个 highest-average-prefix 将在与 0 成最大角度的点结束。接下来的 highest-average-prefix 必须有一个 更小的 平均值,并且它将在与第一个结尾形成最大角度的点结束。继续到数组的末尾,我们发现...
这些平均值最高的线段恰好是累积和图上凸包中的线段。
使用 monotone chain 算法找到这些片段。由于这些点已经排序,因此需要 O(n) 时间。
# Lengths of the segments in the upper convex hull
# of the cumulative sum graph
def upperSumHullLengths(arr):
if len(arr) < 2:
if len(arr) < 1:
return []
else:
return [1]
hull = [(0, 0),(1, arr[0])]
for x in range(2, len(arr)+1):
# this has x coordinate x-1
prevPoint = hull[len(hull) - 1]
# next point in cumulative sum
point = (x, prevPoint[1] + arr[x-1])
# remove points not on the convex hull
while len(hull) >= 2:
p0 = hull[len(hull)-2]
dx0 = prevPoint[0] - p0[0]
dy0 = prevPoint[1] - p0[1]
dx1 = x - prevPoint[0]
dy1 = point[1] - prevPoint[1]
if dy1*dx0 < dy0*dx1:
break
hull.pop()
prevPoint = p0
hull.append(point)
return [hull[i+1][0] - hull[i][0] for i in range(0, len(hull)-1)]
print(upperSumHullLengths([ 1, 7, 8, 4, 2, 1, 4]))
打印:
[3, 1, 3]
Matt 和 kcsquared 的解决方案和一些基准的一些简化版本:
from itertools import accumulate, pairwise
def Matt_Pychoed(arr):
hull = [(0, 0)]
for x, y in enumerate(accumulate(arr), 1):
while len(hull) >= 2:
(x0, y0), (x1, y1) = hull[-2:]
dx0 = x1 - x0
dy0 = y1 - y0
dx1 = x - x1
dy1 = y - y1
if dy1*dx0 < dy0*dx1:
break
hull.pop()
hull.append((x, y))
return [q[0] - p[0] for p, q in pairwise(hull)]
from itertools import accumulate, count
from operator import truediv
def kc_Pychoed_2(nums):
removals = []
while nums:
averages = map(truediv, accumulate(nums), count(1))
remove = max(zip(averages, count(1)))[1]
removals.append(remove)
nums = nums[remove:]
return removals
使用 100,000 个从 1 到 1000 的随机整数的 20 个不同数组进行基准测试:
min median mean max
65 ms 164 ms 159 ms 249 ms kc
38 ms 98 ms 92 ms 146 ms kc_Pychoed_1
58 ms 127 ms 120 ms 189 ms kc_Pychoed_2
134 ms 137 ms 138 ms 157 ms Matt
101 ms 102 ms 103 ms 111 ms Matt_Pychoed
其中 kc_Pychoed_1
是 kcsquared,但有整数 running_sum
而没有 math.isclose
。我验证所有解决方案对每个输入都计算出相同的结果。
对于此类 随机 数据,kcsquared 似乎介于 O(n) 和 O(n log n) 之间。但如果数组严格递减,它就会退化为二次方。 arr = [1000, 999, 998, ..., 2, 1]
我得到:
min median mean max
102 ms 106 ms 107 ms 116 ms kc
60 ms 61 ms 61 ms 62 ms kc_Pychoed_1
76 ms 77 ms 77 ms 86 ms kc_Pychoed_2
0 ms 1 ms 1 ms 1 ms Matt
0 ms 0 ms 0 ms 0 ms Matt_Pychoed
基准代码(Try it online!):
from timeit import default_timer as timer
from statistics import mean, median
import random
from typing import List, Tuple
import math
from itertools import accumulate, count
from operator import truediv
def kc(nums: List[int]) -> List[int]:
"""Return list of lengths of max average prefix reductions."""
def max_prefix_avg(arr: List[int]) -> Tuple[float, int]:
"""Return value and length of max average prefix in arr"""
if len(arr) == 0:
return (-math.inf, 0)
best_length = 1
best_average = -math.inf
running_sum = 0.0
for i, x in enumerate(arr, 1):
running_sum += x
new_average = running_sum / i
if (new_average >= best_average
or math.isclose(new_average, best_average)):
best_average = new_average
best_length = i
return (best_average, best_length)
removed_lengths = []
total_removed = 0
while total_removed < len(nums):
_, new_removal = max_prefix_avg(nums[total_removed:])
removed_lengths.append(new_removal)
total_removed += new_removal
return removed_lengths
def kc_Pychoed_1(nums: List[int]) -> List[int]:
"""Return list of lengths of max average prefix reductions."""
def max_prefix_avg(arr: List[int]) -> Tuple[float, int]:
"""Return value and length of max average prefix in arr"""
if len(arr) == 0:
return (-math.inf, 0)
best_length = 1
best_average = -math.inf
running_sum = 0
for i, x in enumerate(arr, 1):
running_sum += x
new_average = running_sum / i
if new_average >= best_average:
best_average = new_average
best_length = i
return (best_average, best_length)
removed_lengths = []
total_removed = 0
while total_removed < len(nums):
_, new_removal = max_prefix_avg(nums[total_removed:])
removed_lengths.append(new_removal)
total_removed += new_removal
return removed_lengths
def kc_Pychoed_2(nums):
removals = []
while nums:
averages = map(truediv, accumulate(nums), count(1))
remove = max(zip(averages, count(1)))[1]
removals.append(remove)
nums = nums[remove:]
return removals
# Lengths of the segments in the upper convex hull
# of the cumulative sum graph
def Matt(arr):
if len(arr) < 2:
if len(arr) < 1:
return []
else:
return [1]
hull = [(0, 0),(1, arr[0])]
for x in range(2, len(arr)+1):
# this has x coordinate x-1
prevPoint = hull[len(hull) - 1]
# next point in cumulative sum
point = (x, prevPoint[1] + arr[x-1])
# remove points not on the convex hull
while len(hull) >= 2:
p0 = hull[len(hull)-2]
dx0 = prevPoint[0] - p0[0]
dy0 = prevPoint[1] - p0[1]
dx1 = x - prevPoint[0]
dy1 = point[1] - prevPoint[1]
if dy1*dx0 < dy0*dx1:
break
hull.pop()
prevPoint = p0
hull.append(point)
return [hull[i+1][0] - hull[i][0] for i in range(0, len(hull)-1)]
def pairwise(lst):
return zip(lst, lst[1:])
def Matt_Pychoed(arr):
hull = [(0, 0)]
for x, y in enumerate(accumulate(arr), 1):
while len(hull) >= 2:
(x0, y0), (x1, y1) = hull[-2:]
dx0 = x1 - x0
dy0 = y1 - y0
dx1 = x - x1
dy1 = y - y1
if dy1*dx0 < dy0*dx1:
break
hull.pop()
hull.append((x, y))
return [q[0] - p[0] for p, q in pairwise(hull)]
funcs = kc, kc_Pychoed_1, kc_Pychoed_2, Matt, Matt_Pychoed
stats = min, median, mean, max
tss = [[] for _ in funcs]
for r in range(1, 21):
print(f'After round {r}:')
arr = random.choices(range(1, 1001), k=100_000)
# arr = list(range(1000, 1, -1))
expect = None
print(*(f'{stat.__name__:^7}' for stat in stats))
for func, ts in zip(funcs, tss):
t0 = timer()
result = func(arr)
t1 = timer()
ts.append(t1 - t0)
if expect is None:
expect = result
assert result == expect
print(*('%3d ms ' % (stat(ts) * 1e3) for stat in stats), func.__name__)
print()