对元素进行排序以最大化正差异的数量

Sort elements to maximise amount of positive differences

我有一个整数列表。数字可以重复。我希望以这种方式“排序”它们以获得尽可能多的“跳跃”(从下一个元素到当前元素的差异为正)。

示例:

[10, 10, 10, 20, 20, 20]  # only one "jump" from 10 to 20
[10, 20, 10, 20, 10, 20]  # three jumps (10->20, 10->20, 10->20) - the correct answer
[20, 10, 20, 10, 20, 10]  # two jumps

[11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]  # 9
[9, 11, 2, 19, 4, 11, 15, 5, 7, 11, 16, 19, 1, 4, 8, 11, 16, 19, 9, 17]  # 14
[2, 9, 11, 16, 17, 19, 4, 5, 8, 15, 16, 9, 11, 1, 7, 11, 19, 4, 11, 19]  # 15
[1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11]  # 16

我完全低效(但有效)的代码。:

def sol1(my_list):
    my_list.sort()
    final_list = []
    to_delete = []
    i = 0
    last_element = float('-inf')
    while my_list:
        if i >= len(my_list):
            i = 0
            for index in to_delete[::-1]:
                my_list.pop(index)
            if len(my_list):
                last_element = my_list.pop(0)
                final_list.append(last_element)
            to_delete = []
            continue

        curr_element = my_list[i]
        if curr_element > last_element:
            final_list.append(curr_element)
            last_element = curr_element
            to_delete.append(i)
        i += 1
    return final_list

有谁知道优化解决方案的方法吗?现在我多次迭代列表。它不需要在 Python.

根据每个元素之前出现的次数给出每个元素cumcount。然后先按 cumcount 排序,然后按其值排序。

arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
cumcount = []
d = dict.fromkeys(arr, 0)

for el in arr:
    cumcount.append(d[el])
    d[el] += 1

[x[1] for x in sorted(zip(cumcount, arr))]
# [1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11]

我认为这应该是等价的,只需要 O(n log n) 时间进行排序,O(n) 时间用于其余部分。

from collections import Counter, OrderedDict

arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]
d = OrderedDict(Counter(sorted(arr)))

ans = []
while d:
    ans += d
    for x in list(d):
        d[x] -= 1
        if not d[x]:
            del d[x]
print(ans)

另一个,灵感来自 trincot:

from collections import defaultdict
from itertools import count

arr = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]

d = defaultdict(count)
arr.sort(key=lambda x: (next(d[x]), x))

print(arr)

基准以及其他解决方案,基于您自己的建议输入和我的两个(对于每个输入,显示了多次尝试中每个解决方案的最佳三次):

[randint(1, 10**5) for i in range(10**4)]
 2.14 ms   2.15 ms   2.18 ms  Kelly4c
 2.19 ms   2.24 ms   2.32 ms  Kelly4b
 2.23 ms   2.25 ms   2.37 ms  Kelly4
 5.83 ms   6.02 ms   6.03 ms  original
 7.05 ms   7.12 ms   7.54 ms  Kelly1
 7.82 ms   8.43 ms   8.45 ms  Kelly3b
 8.13 ms   8.15 ms   8.92 ms  Kelly3
 9.06 ms   9.44 ms   9.50 ms  db0
10.25 ms  10.28 ms  10.31 ms  db
11.09 ms  11.11 ms  11.23 ms  trincot
11.19 ms  11.25 ms  11.58 ms  Kelly2
11.29 ms  11.65 ms  11.74 ms  db1
11.64 ms  11.65 ms  12.49 ms  Kelly3c

list(range(n := 1000)) + [n] * n
 0.57 ms   0.60 ms   0.63 ms  Kelly2
 0.64 ms   0.65 ms   0.68 ms  Kelly3
 0.66 ms   0.69 ms   0.69 ms  trincot
 0.69 ms   0.71 ms   0.71 ms  db
 0.69 ms   0.70 ms   0.70 ms  db1
 0.72 ms   0.74 ms   0.75 ms  Kelly3b
 0.99 ms   1.04 ms   1.11 ms  Kelly3c
 1.04 ms   1.08 ms   1.09 ms  Kelly1
28.27 ms  28.56 ms  28.63 ms  Kelly4b
36.58 ms  36.81 ms  37.03 ms  Kelly4c
39.78 ms  40.07 ms  40.37 ms  Kelly4
80.41 ms  80.96 ms  81.99 ms  original
81.00 ms  81.90 ms  82.08 ms  db0

list(range(n := 10000)) + [n] * n
 7.11 ms   7.37 ms   7.42 ms  Kelly2
 7.30 ms   7.62 ms   7.63 ms  db
 7.31 ms   7.31 ms   7.37 ms  Kelly3
 7.52 ms   7.64 ms   7.80 ms  trincot
 7.64 ms   7.82 ms   7.94 ms  db1
 8.81 ms   8.83 ms   8.84 ms  Kelly3b
10.18 ms  10.41 ms  10.52 ms  Kelly1
10.85 ms  10.92 ms  11.16 ms  Kelly3c

基准代码(Try it online!):

from timeit import timeit
from collections import Counter, OrderedDict, defaultdict, deque
from itertools import count, chain, repeat
from random import randint, shuffle
from bisect import insort

def Kelly1(arr):
  d = OrderedDict(Counter(sorted(arr)))
  ans = []
  while d:
    ans += d
    for x in list(d):
        d[x] -= 1
        if not d[x]:
            del d[x]
  return ans

def Kelly2(arr):
  d = defaultdict(count)
  arr.sort(key=lambda x: (next(d[x]), x))
  return arr

def Kelly3(arr):
  ctr = Counter(arr)
  rounds = [[] for _ in range(max(ctr.values()))]
  for x, count in sorted(ctr.items()):
    for rnd in rounds[:count]:
      rnd.append(x)
  return list(chain.from_iterable(rounds))

def Kelly3b(arr):
  ctr = Counter(arr)
  rounds = [[] for _ in range(max(ctr.values()))]
  appends = [rnd.append for rnd in rounds]
  for x, count in sorted(ctr.items()):
    for append in appends[:count]:
      append(x)
  return list(chain.from_iterable(rounds))

def Kelly3c(arr):
  ctr = Counter(arr)
  rounds = [[] for _ in range(max(ctr.values()))]
  for x, count in sorted(ctr.items()):
    deque(map(list.append, rounds[:count], repeat(x)), 0)
  return list(chain.from_iterable(rounds))

def Kelly4(arr):
  arr.sort()
  out = [].append
  while arr:
    postpone = [].append
    last = None
    for x in arr:
      if last != x:
        out(x)
      else:
        postpone(x)
      last = x
    arr = postpone.__self__
  return out.__self__

def Kelly4b(arr):
  arr.sort()
  out = [].append
  while arr:
    postpone = [].append
    last = None
    arr = [x 
           for x in arr
           if last == x
           or out(last := x)]
  return out.__self__

def Kelly4c(arr):
  arr.sort()
  out = []
  while arr:
    postpone = [].append
    last = None
    out += [last := x 
            for x in arr
            if last != x
            or postpone(x)]
    arr = postpone.__self__
  return out

def original(my_list):
    my_list.sort()
    final_list = []
    to_delete = []
    i = 0
    last_element = float('-inf')
    while my_list:
        if i >= len(my_list):
            i = 0
            for index in to_delete[::-1]:
                my_list.pop(index)
            if len(my_list):
                last_element = my_list.pop(0)
                final_list.append(last_element)
            to_delete = []
            continue

        curr_element = my_list[i]
        if curr_element > last_element:
            final_list.append(curr_element)
            last_element = curr_element
            to_delete.append(i)
        i += 1
    return final_list

def db(arr):
  cumcount = []
  d = dict.fromkeys(arr, 0)
  for el in arr:
    cumcount.append(d[el])
    d[el] += 1
  return [x[1] for x in sorted(zip(cumcount, arr))]

def db0(arr):
  d = Counter(arr)
  keys = sorted(d.keys())
  ans = []
  while len(ans) < len(arr):
    for k in keys:
        if d.get(k, 0) > 0:
            ans.append(k)
            d[k] -= 1
  return ans

def db1(arr):
  cumcount = []
  d = {k: 0 for k in set(arr)}
  for el in arr:
    cumcount.append(d[el])
    d[el] += 1
  return [x[1] for x in sorted(zip(cumcount, arr))]

def trincot(lst):
  return [num for _,num in sorted(
    (i, num)
    for num, freq in Counter(lst).items()
        for i in range(freq)
  )]

funcs = [Kelly1, Kelly2, Kelly3, Kelly3b, Kelly3c, Kelly4, Kelly4b, Kelly4c, original, db, db0, db1, trincot]

def test(arr, funcs):
  print(arr)
  arr = eval(arr)

  expect = original(arr[:])
  for func in funcs:
    result = func(arr[:])
    if result != expect:
      print(expect[:20])
      print(result[:20])
    assert result == expect, func
  
  times = {func: [] for func in funcs}
  for _ in range(20):
    shuffle(funcs)
    for func in funcs:
      copy = arr[:]
      t = timeit(lambda: func(copy), 'gc.enable()', number=1)
      insort(times[func], t)
  for func in sorted(funcs, key=times.get):
    print(*('%5.2f ms ' % (t * 1e3) for t in times[func][:3]), func.__name__)
  print()

test('[randint(1, 10**5) for i in range(10**4)]',
     funcs)
test('list(range(n := 1000)) + [n] * n',
     funcs)
test('list(range(n := 10000)) + [n] * n',
     list(set(funcs) - {original, Kelly4, Kelly4b, Kelly4c, db0}))

该算法应处理重复值,以便首先所有第一次出现的数字按排序顺序出现,然后第二次出现的重复项按排序顺序出现,然后是第三次出现,...等等

from collections import Counter 

lst = [11, 16, 8, 9, 4, 1, 2, 17, 4, 15, 9, 11, 11, 7, 19, 16, 19, 5, 19, 11]

result = [num for _,num in sorted(
    (i, num)
    for num, freq in Counter(lst).items()
        for i in range(freq)
)]

result 将是:

[1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 19, 4, 9, 11, 16, 19, 11, 19, 11]