使用千位分隔符对数字行进行排序
Sort numeric lines with thousand separators
我想对以数字开头的列表进行排序(使用 Python3)。
许多这些数字都带有千位分隔符(点)和小数点(逗号)。
我的列表摘录:
mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,25']
我试过这个:
数字排序:
sorted(mylist, key=int, reverse=True)
--> 给出 'not an integer' 错误
我也试过这个:
字母数字排序:
convert = lambda text: int(text) if text.isdigit() else text
alphanum_key = lambda key: [ convert(c) for c in regex.split('([0-9]+)', key) ]
mysort.sort( key=alphanum_key, reverse=True )
字母数字输出:
['23.130', '23 text', '12 text', '3,25', '1.482 text']
预期输出:
['3,25', '12 text', '23 text', '1.482 text', '23.130']
如何根据预期输出对列表进行排序?
编辑
如果有只有文本的字符串p.e。
mylist = ['2 another test', '4,32', '801', '4apples', 'foo', '4,32 hi', 'apples4', '', '13.300 a test', '2apples', 'doo', '12 today']
我想要如下输出(包括空字段):
['2 another test', '2apples', '4apples', '4,32', '4,32 hi', '12 today', '801', '13.300 a test', 'apples4', 'doo', 'foo', '']
你可以用自定义排序函数解决:
>>> sorted(mylist, key=lambda item: float(item.split(" ", 1)[0].replace(".", "").replace(",", ".")))
['3,25', '12 text', '23 text', '1.482 text', '23.130']
其中 key
函数在这种情况下将每个项目拆分为 space,获取第一个项目,将点替换为空字符串并将逗号替换为点,然后转换结果进入浮动。
这个解决方案有一些假设,它适用于提供的示例数据,您可能需要 tweak/improve 它才能处理您的真实数据 - 例如,如果它不能使现在它会失败转换为 float
.
其实也可以用locale,把locale设置到合适的区域后用locale.atof
投射即可:
In [6]: from locale import atof
In [7]: import locale
In [8]: locale.setlocale(locale.LC_ALL, 'de_DE')
Out[8]: 'de_DE'
In [9]: mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250']
In [10]: sorted(mylist,key=lambda x: atof(x.split()[0]))
Out[10]: ['3,250', '12 text', '23 text', '1.482 text', '23.130']
如果你可以只有文本,你可以使用 try/except,你对字符串排序的期望将决定我们在 except 中做什么,现在我只是 return float ("inf") 所以字符串被推到最后:
from locale import atof
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250', "foo"]
def atof_try(x):
try:
return atof(x.split()[0])
except ValueError:
return float("inf")
因此,如果我们将 foo
添加到我的列表中:
In [35]: mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250', "foo"]
In [36]: sorted(mylist, key=atof_try)
Out[36]: ['3,250', '12 text', '23 text', '1.482 text', '23.130', 'foo']
好的,禁止末尾的空字符串这符合您的预期输出,常规排序会将空字符串放在末尾,如果真的很重要,我们可以更改:
from locale import atof
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
import re
wrong_type = object()
def atof_try(x):
try:
return atof(x.split()[0])
except ValueError:
return wrong_type
def atof_pre(x, patt=re.compile("^\d+")):
try:
_atof = atof_try(x)
if _atof is not wrong_type:
return _atof
temp = patt.search(x)
return int(temp.group())
except (ValueError, IndexError, AttributeError):
return wrong_type
def merge_types(l, out):
for ele in l:
if atof_pre(ele) is not wrong_type:
yield ele
else:
out.append(ele)
输出:
In [3]: temp = []
In [4]: mylist[:] = sorted(merge_types(mylist, temp), key=atof_pre) + sorted(temp)
In [5]: print(mylist)
['2 another test', '2apples', '4apples', '4,32', '4,32 hi', '12 today', '801', '13.300 a test', '', 'apples4', 'doo', 'foo']
将逻辑放在 class 中并对奇数列表进行就地排序并扩展代替串联,您可以传入 lambda 以指定要排序的内容,并且 rev
确定是否你是否尊重排序:
from locale import atof
import re
class WeirdSort:
def __init__(self, in_list, rev=False, which=None, other=None):
# holds all strings that don't match the pattern we want.
self.temp = []
self.in_list = in_list
self.wrong_type = object()
# what lambda to pass as the sort key.
self.which = which
# split data and sort in_list.
self.in_list[:] = sorted(self.separate_types(), key=self.atof_pre, reverse=rev)
# sort odd strings.
self.temp.sort(key=other, reverse=rev)
# merge both lists.
if rev:
self.temp.extend(self.in_list)
self.in_list[:] = self.temp
else:
self.in_list.extend(self.temp)
del self.temp
def atof_try(self, x):
"""Try to cast using specified locale,
return wrong_type on failure."""
try:
return atof(self.which(x))
except ValueError:
return self.wrong_type
def atof_pre(self, x, patt=re.compile("^\d+")):
"""Try to cast using atof initially,
on failure, try to pull digits from
front of string and cast to int.
On failure, returns wrong_type object
which will mean "x" will be sorted using a regular sort.
"""
try:
_atof = self.atof_try(x)
if _atof is not self.wrong_type:
return _atof
temp = patt.search(x)
return int(temp.group())
except (ValueError, IndexError, AttributeError):
return self.wrong_type
def separate_types(self):
"""Separate elements that can be cast to a float
using atof/int/re logic and those that cannot,
anything that cannot be sorted will be
added to temp_list and sorted separately.
"""
for ele in self.in_list:
if self.atof_pre(ele) is not self.wrong_type:
yield ele
else:
self.temp.append(ele)
空字符串现在也在末尾。
所以对于输入:
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
mylist = ['2 another test', '4,32', '801', '4apples', 'foo', '4,32 hi', 'apples4', '', '13.300 a test', '2apples', 'doo', '12 today']
flat_lambda1, flat_lambda2 = lambda x: x.split()[0], lambda x: (x == "", x)
WeirdSort(mylist, True, flat_lambda1, flat_lambda2)
print(mylist)
sublst_lambda1, sublist_lambda2 = lambda x: x[0].split()[0], lambda x: (x[0] == "", x[0])
WeirdSort(mylist, False, lambda x: x.split()[0], lambda x: (x == "", x))
print(mylist)
mylist = [['3,25', 1], ['12 text', 2], ["", 5], ['23 text', 3]]
WeirdSort(mylist, True, sublst_lambda1, sublist_lambda2)
print(mylist)
WeirdSort(mylist, False, sublst_lambda1, sublist_lambda2)
print(mylist)
你得到:
['', 'foo', 'doo', 'apples4', '13.300 a test', '801', '12 today', '4,32', '4,32 hi', '4apples', '2 another test', '2apples']
['2 another test', '2apples', '4apples', '4,32', '4,32 hi', '12 today', '801', '13.300 a test', 'apples4', 'doo', 'foo', '']
[['', 5], ['23 text', 3], ['12 text', 2], ['3,25', 1]]
[['3,25', 1], ['12 text', 2], ['23 text', 3], ['', 5]]
老问题,但是 natsort 库版本 5.0.0 现在在调用 humansort
:
时尊重语言环境的千位分隔符
import natsort, locale
locale.setlocale(locale.LC_ALL, 'german')
mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,25']
natsort.humansorted(mylist)
我想对以数字开头的列表进行排序(使用 Python3)。
许多这些数字都带有千位分隔符(点)和小数点(逗号)。
我的列表摘录:
mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,25']
我试过这个:
数字排序:
sorted(mylist, key=int, reverse=True)
--> 给出 'not an integer' 错误
我也试过这个:
字母数字排序:
convert = lambda text: int(text) if text.isdigit() else text
alphanum_key = lambda key: [ convert(c) for c in regex.split('([0-9]+)', key) ]
mysort.sort( key=alphanum_key, reverse=True )
字母数字输出:
['23.130', '23 text', '12 text', '3,25', '1.482 text']
预期输出:
['3,25', '12 text', '23 text', '1.482 text', '23.130']
如何根据预期输出对列表进行排序?
编辑
如果有只有文本的字符串p.e。
mylist = ['2 another test', '4,32', '801', '4apples', 'foo', '4,32 hi', 'apples4', '', '13.300 a test', '2apples', 'doo', '12 today']
我想要如下输出(包括空字段):
['2 another test', '2apples', '4apples', '4,32', '4,32 hi', '12 today', '801', '13.300 a test', 'apples4', 'doo', 'foo', '']
你可以用自定义排序函数解决:
>>> sorted(mylist, key=lambda item: float(item.split(" ", 1)[0].replace(".", "").replace(",", ".")))
['3,25', '12 text', '23 text', '1.482 text', '23.130']
其中 key
函数在这种情况下将每个项目拆分为 space,获取第一个项目,将点替换为空字符串并将逗号替换为点,然后转换结果进入浮动。
这个解决方案有一些假设,它适用于提供的示例数据,您可能需要 tweak/improve 它才能处理您的真实数据 - 例如,如果它不能使现在它会失败转换为 float
.
其实也可以用locale,把locale设置到合适的区域后用locale.atof
投射即可:
In [6]: from locale import atof
In [7]: import locale
In [8]: locale.setlocale(locale.LC_ALL, 'de_DE')
Out[8]: 'de_DE'
In [9]: mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250']
In [10]: sorted(mylist,key=lambda x: atof(x.split()[0]))
Out[10]: ['3,250', '12 text', '23 text', '1.482 text', '23.130']
如果你可以只有文本,你可以使用 try/except,你对字符串排序的期望将决定我们在 except 中做什么,现在我只是 return float ("inf") 所以字符串被推到最后:
from locale import atof
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250', "foo"]
def atof_try(x):
try:
return atof(x.split()[0])
except ValueError:
return float("inf")
因此,如果我们将 foo
添加到我的列表中:
In [35]: mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,250', "foo"]
In [36]: sorted(mylist, key=atof_try)
Out[36]: ['3,250', '12 text', '23 text', '1.482 text', '23.130', 'foo']
好的,禁止末尾的空字符串这符合您的预期输出,常规排序会将空字符串放在末尾,如果真的很重要,我们可以更改:
from locale import atof
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
import re
wrong_type = object()
def atof_try(x):
try:
return atof(x.split()[0])
except ValueError:
return wrong_type
def atof_pre(x, patt=re.compile("^\d+")):
try:
_atof = atof_try(x)
if _atof is not wrong_type:
return _atof
temp = patt.search(x)
return int(temp.group())
except (ValueError, IndexError, AttributeError):
return wrong_type
def merge_types(l, out):
for ele in l:
if atof_pre(ele) is not wrong_type:
yield ele
else:
out.append(ele)
输出:
In [3]: temp = []
In [4]: mylist[:] = sorted(merge_types(mylist, temp), key=atof_pre) + sorted(temp)
In [5]: print(mylist)
['2 another test', '2apples', '4apples', '4,32', '4,32 hi', '12 today', '801', '13.300 a test', '', 'apples4', 'doo', 'foo']
将逻辑放在 class 中并对奇数列表进行就地排序并扩展代替串联,您可以传入 lambda 以指定要排序的内容,并且 rev
确定是否你是否尊重排序:
from locale import atof
import re
class WeirdSort:
def __init__(self, in_list, rev=False, which=None, other=None):
# holds all strings that don't match the pattern we want.
self.temp = []
self.in_list = in_list
self.wrong_type = object()
# what lambda to pass as the sort key.
self.which = which
# split data and sort in_list.
self.in_list[:] = sorted(self.separate_types(), key=self.atof_pre, reverse=rev)
# sort odd strings.
self.temp.sort(key=other, reverse=rev)
# merge both lists.
if rev:
self.temp.extend(self.in_list)
self.in_list[:] = self.temp
else:
self.in_list.extend(self.temp)
del self.temp
def atof_try(self, x):
"""Try to cast using specified locale,
return wrong_type on failure."""
try:
return atof(self.which(x))
except ValueError:
return self.wrong_type
def atof_pre(self, x, patt=re.compile("^\d+")):
"""Try to cast using atof initially,
on failure, try to pull digits from
front of string and cast to int.
On failure, returns wrong_type object
which will mean "x" will be sorted using a regular sort.
"""
try:
_atof = self.atof_try(x)
if _atof is not self.wrong_type:
return _atof
temp = patt.search(x)
return int(temp.group())
except (ValueError, IndexError, AttributeError):
return self.wrong_type
def separate_types(self):
"""Separate elements that can be cast to a float
using atof/int/re logic and those that cannot,
anything that cannot be sorted will be
added to temp_list and sorted separately.
"""
for ele in self.in_list:
if self.atof_pre(ele) is not self.wrong_type:
yield ele
else:
self.temp.append(ele)
空字符串现在也在末尾。
所以对于输入:
import locale
locale.setlocale(locale.LC_ALL, 'de_DE')
mylist = ['2 another test', '4,32', '801', '4apples', 'foo', '4,32 hi', 'apples4', '', '13.300 a test', '2apples', 'doo', '12 today']
flat_lambda1, flat_lambda2 = lambda x: x.split()[0], lambda x: (x == "", x)
WeirdSort(mylist, True, flat_lambda1, flat_lambda2)
print(mylist)
sublst_lambda1, sublist_lambda2 = lambda x: x[0].split()[0], lambda x: (x[0] == "", x[0])
WeirdSort(mylist, False, lambda x: x.split()[0], lambda x: (x == "", x))
print(mylist)
mylist = [['3,25', 1], ['12 text', 2], ["", 5], ['23 text', 3]]
WeirdSort(mylist, True, sublst_lambda1, sublist_lambda2)
print(mylist)
WeirdSort(mylist, False, sublst_lambda1, sublist_lambda2)
print(mylist)
你得到:
['', 'foo', 'doo', 'apples4', '13.300 a test', '801', '12 today', '4,32', '4,32 hi', '4apples', '2 another test', '2apples']
['2 another test', '2apples', '4apples', '4,32', '4,32 hi', '12 today', '801', '13.300 a test', 'apples4', 'doo', 'foo', '']
[['', 5], ['23 text', 3], ['12 text', 2], ['3,25', 1]]
[['3,25', 1], ['12 text', 2], ['23 text', 3], ['', 5]]
老问题,但是 natsort 库版本 5.0.0 现在在调用 humansort
:
import natsort, locale
locale.setlocale(locale.LC_ALL, 'german')
mylist = ['23 text', '23.130', '12 text', '1.482 text', '3,25']
natsort.humansorted(mylist)