对元素进行排序以最大化正差异的数量
Sort elements to maximise amount of positive differences
我有一个整数列表。数字可以重复。我希望以这种方式“排序”它们以获得尽可能多的“跳跃”(从下一个元素到当前元素的差异为正)。
示例:
[10, 10, 10, 20, 20, 20] # only one "jump" from 10 to 20
[10, 20, 10, 20, 10, 20] # three jumps (10->20, 10->20, 10->20) - the correct answer
[20, 10, 20, 10, 20, 10] # two jumps
[11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11] # 9
[9, 11, 2, 19, 4, 11, 15, 5, 7, 11, 16, 19, 1, 4, 8, 11, 16, 19, 9, 17] # 14
[2, 9, 11, 16, 17, 19, 4, 5, 8, 15, 16, 9, 11, 1, 7, 11, 19, 4, 11, 19] # 15
[1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11] # 16
我完全低效(但有效)的代码。:
def sol1(my_list):
my_list.sort()
final_list = []
to_delete = []
i = 0
last_element = float('-inf')
while my_list:
if i >= len(my_list):
i = 0
for index in to_delete[::-1]:
my_list.pop(index)
if len(my_list):
last_element = my_list.pop(0)
final_list.append(last_element)
to_delete = []
continue
curr_element = my_list[i]
if curr_element > last_element:
final_list.append(curr_element)
last_element = curr_element
to_delete.append(i)
i += 1
return final_list
有谁知道优化解决方案的方法吗?现在我多次迭代列表。它不需要在 Python.
中
根据每个元素之前出现的次数给出每个元素cumcount
。然后先按 cumcount
排序,然后按其值排序。
arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
cumcount = []
d = dict.fromkeys(arr, 0)
for el in arr:
cumcount.append(d[el])
d[el] += 1
[x[1] for x in sorted(zip(cumcount, arr))]
# [1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11]
我认为这应该是等价的,只需要 O(n log n) 时间进行排序,O(n) 时间用于其余部分。
from collections import Counter, OrderedDict
arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
d = OrderedDict(Counter(sorted(arr)))
ans = []
while d:
ans += d
for x in list(d):
d[x] -= 1
if not d[x]:
del d[x]
print(ans)
另一个,灵感来自 trincot:
from collections import defaultdict
from itertools import count
arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
d = defaultdict(count)
arr.sort(key=lambda x: (next(d[x]), x))
print(arr)
基准以及其他解决方案,基于您自己的建议输入和我的两个(对于每个输入,显示了多次尝试中每个解决方案的最佳三次):
[randint(1, 10**5) for i in range(10**4)]
2.14 ms 2.15 ms 2.18 ms Kelly4c
2.19 ms 2.24 ms 2.32 ms Kelly4b
2.23 ms 2.25 ms 2.37 ms Kelly4
5.83 ms 6.02 ms 6.03 ms original
7.05 ms 7.12 ms 7.54 ms Kelly1
7.82 ms 8.43 ms 8.45 ms Kelly3b
8.13 ms 8.15 ms 8.92 ms Kelly3
9.06 ms 9.44 ms 9.50 ms db0
10.25 ms 10.28 ms 10.31 ms db
11.09 ms 11.11 ms 11.23 ms trincot
11.19 ms 11.25 ms 11.58 ms Kelly2
11.29 ms 11.65 ms 11.74 ms db1
11.64 ms 11.65 ms 12.49 ms Kelly3c
list(range(n := 1000)) + [n] * n
0.57 ms 0.60 ms 0.63 ms Kelly2
0.64 ms 0.65 ms 0.68 ms Kelly3
0.66 ms 0.69 ms 0.69 ms trincot
0.69 ms 0.71 ms 0.71 ms db
0.69 ms 0.70 ms 0.70 ms db1
0.72 ms 0.74 ms 0.75 ms Kelly3b
0.99 ms 1.04 ms 1.11 ms Kelly3c
1.04 ms 1.08 ms 1.09 ms Kelly1
28.27 ms 28.56 ms 28.63 ms Kelly4b
36.58 ms 36.81 ms 37.03 ms Kelly4c
39.78 ms 40.07 ms 40.37 ms Kelly4
80.41 ms 80.96 ms 81.99 ms original
81.00 ms 81.90 ms 82.08 ms db0
list(range(n := 10000)) + [n] * n
7.11 ms 7.37 ms 7.42 ms Kelly2
7.30 ms 7.62 ms 7.63 ms db
7.31 ms 7.31 ms 7.37 ms Kelly3
7.52 ms 7.64 ms 7.80 ms trincot
7.64 ms 7.82 ms 7.94 ms db1
8.81 ms 8.83 ms 8.84 ms Kelly3b
10.18 ms 10.41 ms 10.52 ms Kelly1
10.85 ms 10.92 ms 11.16 ms Kelly3c
基准代码(Try it online!):
from timeit import timeit
from collections import Counter, OrderedDict, defaultdict, deque
from itertools import count, chain, repeat
from random import randint, shuffle
from bisect import insort
def Kelly1(arr):
d = OrderedDict(Counter(sorted(arr)))
ans = []
while d:
ans += d
for x in list(d):
d[x] -= 1
if not d[x]:
del d[x]
return ans
def Kelly2(arr):
d = defaultdict(count)
arr.sort(key=lambda x: (next(d[x]), x))
return arr
def Kelly3(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
for x, count in sorted(ctr.items()):
for rnd in rounds[:count]:
rnd.append(x)
return list(chain.from_iterable(rounds))
def Kelly3b(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
appends = [rnd.append for rnd in rounds]
for x, count in sorted(ctr.items()):
for append in appends[:count]:
append(x)
return list(chain.from_iterable(rounds))
def Kelly3c(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
for x, count in sorted(ctr.items()):
deque(map(list.append, rounds[:count], repeat(x)), 0)
return list(chain.from_iterable(rounds))
def Kelly4(arr):
arr.sort()
out = [].append
while arr:
postpone = [].append
last = None
for x in arr:
if last != x:
out(x)
else:
postpone(x)
last = x
arr = postpone.__self__
return out.__self__
def Kelly4b(arr):
arr.sort()
out = [].append
while arr:
postpone = [].append
last = None
arr = [x
for x in arr
if last == x
or out(last := x)]
return out.__self__
def Kelly4c(arr):
arr.sort()
out = []
while arr:
postpone = [].append
last = None
out += [last := x
for x in arr
if last != x
or postpone(x)]
arr = postpone.__self__
return out
def original(my_list):
my_list.sort()
final_list = []
to_delete = []
i = 0
last_element = float('-inf')
while my_list:
if i >= len(my_list):
i = 0
for index in to_delete[::-1]:
my_list.pop(index)
if len(my_list):
last_element = my_list.pop(0)
final_list.append(last_element)
to_delete = []
continue
curr_element = my_list[i]
if curr_element > last_element:
final_list.append(curr_element)
last_element = curr_element
to_delete.append(i)
i += 1
return final_list
def db(arr):
cumcount = []
d = dict.fromkeys(arr, 0)
for el in arr:
cumcount.append(d[el])
d[el] += 1
return [x[1] for x in sorted(zip(cumcount, arr))]
def db0(arr):
d = Counter(arr)
keys = sorted(d.keys())
ans = []
while len(ans) < len(arr):
for k in keys:
if d.get(k, 0) > 0:
ans.append(k)
d[k] -= 1
return ans
def db1(arr):
cumcount = []
d = {k: 0 for k in set(arr)}
for el in arr:
cumcount.append(d[el])
d[el] += 1
return [x[1] for x in sorted(zip(cumcount, arr))]
def trincot(lst):
return [num for _,num in sorted(
(i, num)
for num, freq in Counter(lst).items()
for i in range(freq)
)]
funcs = [Kelly1, Kelly2, Kelly3, Kelly3b, Kelly3c, Kelly4, Kelly4b, Kelly4c, original, db, db0, db1, trincot]
def test(arr, funcs):
print(arr)
arr = eval(arr)
expect = original(arr[:])
for func in funcs:
result = func(arr[:])
if result != expect:
print(expect[:20])
print(result[:20])
assert result == expect, func
times = {func: [] for func in funcs}
for _ in range(20):
shuffle(funcs)
for func in funcs:
copy = arr[:]
t = timeit(lambda: func(copy), 'gc.enable()', number=1)
insort(times[func], t)
for func in sorted(funcs, key=times.get):
print(*('%5.2f ms ' % (t * 1e3) for t in times[func][:3]), func.__name__)
print()
test('[randint(1, 10**5) for i in range(10**4)]',
funcs)
test('list(range(n := 1000)) + [n] * n',
funcs)
test('list(range(n := 10000)) + [n] * n',
list(set(funcs) - {original, Kelly4, Kelly4b, Kelly4c, db0}))
该算法应处理重复值,以便首先所有第一次出现的数字按排序顺序出现,然后第二次出现的重复项按排序顺序出现,然后是第三次出现,...等等
from collections import Counter
lst = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
result = [num for _,num in sorted(
(i, num)
for num, freq in Counter(lst).items()
for i in range(freq)
)]
result
将是:
[1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11]
我有一个整数列表。数字可以重复。我希望以这种方式“排序”它们以获得尽可能多的“跳跃”(从下一个元素到当前元素的差异为正)。
示例:
[10, 10, 10, 20, 20, 20] # only one "jump" from 10 to 20
[10, 20, 10, 20, 10, 20] # three jumps (10->20, 10->20, 10->20) - the correct answer
[20, 10, 20, 10, 20, 10] # two jumps
[11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11] # 9
[9, 11, 2, 19, 4, 11, 15, 5, 7, 11, 16, 19, 1, 4, 8, 11, 16, 19, 9, 17] # 14
[2, 9, 11, 16, 17, 19, 4, 5, 8, 15, 16, 9, 11, 1, 7, 11, 19, 4, 11, 19] # 15
[1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11] # 16
我完全低效(但有效)的代码。:
def sol1(my_list):
my_list.sort()
final_list = []
to_delete = []
i = 0
last_element = float('-inf')
while my_list:
if i >= len(my_list):
i = 0
for index in to_delete[::-1]:
my_list.pop(index)
if len(my_list):
last_element = my_list.pop(0)
final_list.append(last_element)
to_delete = []
continue
curr_element = my_list[i]
if curr_element > last_element:
final_list.append(curr_element)
last_element = curr_element
to_delete.append(i)
i += 1
return final_list
有谁知道优化解决方案的方法吗?现在我多次迭代列表。它不需要在 Python.
中根据每个元素之前出现的次数给出每个元素cumcount
。然后先按 cumcount
排序,然后按其值排序。
arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
cumcount = []
d = dict.fromkeys(arr, 0)
for el in arr:
cumcount.append(d[el])
d[el] += 1
[x[1] for x in sorted(zip(cumcount, arr))]
# [1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11]
我认为这应该是等价的,只需要 O(n log n) 时间进行排序,O(n) 时间用于其余部分。
from collections import Counter, OrderedDict
arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
d = OrderedDict(Counter(sorted(arr)))
ans = []
while d:
ans += d
for x in list(d):
d[x] -= 1
if not d[x]:
del d[x]
print(ans)
另一个,灵感来自 trincot:
from collections import defaultdict
from itertools import count
arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
d = defaultdict(count)
arr.sort(key=lambda x: (next(d[x]), x))
print(arr)
基准以及其他解决方案,基于您自己的建议输入和我的两个(对于每个输入,显示了多次尝试中每个解决方案的最佳三次):
[randint(1, 10**5) for i in range(10**4)]
2.14 ms 2.15 ms 2.18 ms Kelly4c
2.19 ms 2.24 ms 2.32 ms Kelly4b
2.23 ms 2.25 ms 2.37 ms Kelly4
5.83 ms 6.02 ms 6.03 ms original
7.05 ms 7.12 ms 7.54 ms Kelly1
7.82 ms 8.43 ms 8.45 ms Kelly3b
8.13 ms 8.15 ms 8.92 ms Kelly3
9.06 ms 9.44 ms 9.50 ms db0
10.25 ms 10.28 ms 10.31 ms db
11.09 ms 11.11 ms 11.23 ms trincot
11.19 ms 11.25 ms 11.58 ms Kelly2
11.29 ms 11.65 ms 11.74 ms db1
11.64 ms 11.65 ms 12.49 ms Kelly3c
list(range(n := 1000)) + [n] * n
0.57 ms 0.60 ms 0.63 ms Kelly2
0.64 ms 0.65 ms 0.68 ms Kelly3
0.66 ms 0.69 ms 0.69 ms trincot
0.69 ms 0.71 ms 0.71 ms db
0.69 ms 0.70 ms 0.70 ms db1
0.72 ms 0.74 ms 0.75 ms Kelly3b
0.99 ms 1.04 ms 1.11 ms Kelly3c
1.04 ms 1.08 ms 1.09 ms Kelly1
28.27 ms 28.56 ms 28.63 ms Kelly4b
36.58 ms 36.81 ms 37.03 ms Kelly4c
39.78 ms 40.07 ms 40.37 ms Kelly4
80.41 ms 80.96 ms 81.99 ms original
81.00 ms 81.90 ms 82.08 ms db0
list(range(n := 10000)) + [n] * n
7.11 ms 7.37 ms 7.42 ms Kelly2
7.30 ms 7.62 ms 7.63 ms db
7.31 ms 7.31 ms 7.37 ms Kelly3
7.52 ms 7.64 ms 7.80 ms trincot
7.64 ms 7.82 ms 7.94 ms db1
8.81 ms 8.83 ms 8.84 ms Kelly3b
10.18 ms 10.41 ms 10.52 ms Kelly1
10.85 ms 10.92 ms 11.16 ms Kelly3c
基准代码(Try it online!):
from timeit import timeit
from collections import Counter, OrderedDict, defaultdict, deque
from itertools import count, chain, repeat
from random import randint, shuffle
from bisect import insort
def Kelly1(arr):
d = OrderedDict(Counter(sorted(arr)))
ans = []
while d:
ans += d
for x in list(d):
d[x] -= 1
if not d[x]:
del d[x]
return ans
def Kelly2(arr):
d = defaultdict(count)
arr.sort(key=lambda x: (next(d[x]), x))
return arr
def Kelly3(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
for x, count in sorted(ctr.items()):
for rnd in rounds[:count]:
rnd.append(x)
return list(chain.from_iterable(rounds))
def Kelly3b(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
appends = [rnd.append for rnd in rounds]
for x, count in sorted(ctr.items()):
for append in appends[:count]:
append(x)
return list(chain.from_iterable(rounds))
def Kelly3c(arr):
ctr = Counter(arr)
rounds = [[] for _ in range(max(ctr.values()))]
for x, count in sorted(ctr.items()):
deque(map(list.append, rounds[:count], repeat(x)), 0)
return list(chain.from_iterable(rounds))
def Kelly4(arr):
arr.sort()
out = [].append
while arr:
postpone = [].append
last = None
for x in arr:
if last != x:
out(x)
else:
postpone(x)
last = x
arr = postpone.__self__
return out.__self__
def Kelly4b(arr):
arr.sort()
out = [].append
while arr:
postpone = [].append
last = None
arr = [x
for x in arr
if last == x
or out(last := x)]
return out.__self__
def Kelly4c(arr):
arr.sort()
out = []
while arr:
postpone = [].append
last = None
out += [last := x
for x in arr
if last != x
or postpone(x)]
arr = postpone.__self__
return out
def original(my_list):
my_list.sort()
final_list = []
to_delete = []
i = 0
last_element = float('-inf')
while my_list:
if i >= len(my_list):
i = 0
for index in to_delete[::-1]:
my_list.pop(index)
if len(my_list):
last_element = my_list.pop(0)
final_list.append(last_element)
to_delete = []
continue
curr_element = my_list[i]
if curr_element > last_element:
final_list.append(curr_element)
last_element = curr_element
to_delete.append(i)
i += 1
return final_list
def db(arr):
cumcount = []
d = dict.fromkeys(arr, 0)
for el in arr:
cumcount.append(d[el])
d[el] += 1
return [x[1] for x in sorted(zip(cumcount, arr))]
def db0(arr):
d = Counter(arr)
keys = sorted(d.keys())
ans = []
while len(ans) < len(arr):
for k in keys:
if d.get(k, 0) > 0:
ans.append(k)
d[k] -= 1
return ans
def db1(arr):
cumcount = []
d = {k: 0 for k in set(arr)}
for el in arr:
cumcount.append(d[el])
d[el] += 1
return [x[1] for x in sorted(zip(cumcount, arr))]
def trincot(lst):
return [num for _,num in sorted(
(i, num)
for num, freq in Counter(lst).items()
for i in range(freq)
)]
funcs = [Kelly1, Kelly2, Kelly3, Kelly3b, Kelly3c, Kelly4, Kelly4b, Kelly4c, original, db, db0, db1, trincot]
def test(arr, funcs):
print(arr)
arr = eval(arr)
expect = original(arr[:])
for func in funcs:
result = func(arr[:])
if result != expect:
print(expect[:20])
print(result[:20])
assert result == expect, func
times = {func: [] for func in funcs}
for _ in range(20):
shuffle(funcs)
for func in funcs:
copy = arr[:]
t = timeit(lambda: func(copy), 'gc.enable()', number=1)
insort(times[func], t)
for func in sorted(funcs, key=times.get):
print(*('%5.2f ms ' % (t * 1e3) for t in times[func][:3]), func.__name__)
print()
test('[randint(1, 10**5) for i in range(10**4)]',
funcs)
test('list(range(n := 1000)) + [n] * n',
funcs)
test('list(range(n := 10000)) + [n] * n',
list(set(funcs) - {original, Kelly4, Kelly4b, Kelly4c, db0}))
该算法应处理重复值,以便首先所有第一次出现的数字按排序顺序出现,然后第二次出现的重复项按排序顺序出现,然后是第三次出现,...等等
from collections import Counter
lst = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
result = [num for _,num in sorted(
(i, num)
for num, freq in Counter(lst).items()
for i in range(freq)
)]
result
将是:
[1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11]