
Sorting alphanumeric sequences is not correct. How can I improve it?

当我尝试这个时,我无法得到我想要的结果 -

>>> test = { '3 Silver', '3 Oct', '4AD', '99 Reese', '1991', 'alpha', 'beta' }
>>> sorted(test)
['1991', '3 Oct', '3 Silver', '4AD', '99 Reese', 'alpha', 'beta']

这是不正确的,因为 1991 是以数字开头的最高条目,应该出现在 alpha



是的,您可以做到,但您必须创建自己的 "scoring" 系统来创建您想要的订单:

import re

def score(token):
    n = re.sub(r'\D+', '', token)
    if n:
        n = int(n)
    w = re.sub(r'[\d+ ]', '', token)
    return n, w #returning a list/tuple with the most important criteria on the first place, 2nd on the second place, etc

arr = ['3 Silver', '3 Oct', '4AD', '99 Reese', '1991', 'alpha', 'beta']
print sorted(arr, key=score) # ['3 Oct', '3 Silver', '4AD', '99 Reese', '1991', 'alpha', 'beta']


from itertools import takewhile, dropwhile

test = ['3 Silver', '3 Oct', '4AD', '99 Reese', '1991', 'alpha', 'beta']

items = dict()
for word in test:
    ordlist  = []
    ## prenumber will be zero if there are no numerical characters
    prenumber = int(''.join(list(takewhile(lambda i: i.isdigit() , word))) or 0)
    ## setting words that start with alpha characters to have infinity as 
    ## first item. This puts them at the end of the list for sorting. 
    ordlist.append(prenumber or float("inf"))
    ordlist.extend((ord(ch) for ch in dropwhile(lambda i: i.isdigit(), word)))
    items[word] = ordlist

### sort dictionary by value
s = sorted(zip(items.values(), items.keys()))
## [([3, 32, 79, 99, 116], '3 Oct'),
##    ([3, 32, 83, 105, 108, 118, 101, 114], '3 Silver'),
##    ([4, 65, 68], '4AD'),
##    ([99, 32, 82, 101, 101, 115, 101], '99 Reese'),
##    ([1991], '1991'),
##    ([inf, 97, 108, 112, 104, 97], 'alpha'),
##    ([inf, 98, 101, 116, 97], 'beta')]

test_sorted = [e[1] for e in s]
## ['3 Oct', '3 Silver', '4AD', '99 Reese', '1991', 'alpha', 'beta']