python - 嵌套数据的复杂排序
python - complex sorting of nested data
我正在从 postgres(jsonb 类型)检索数据,我需要 return 一个 OrderedDict,它具有可预测的人机消费顺序。有一些通用的(ish)键应该用于指导通用类型值的优先级(基于预定义的顺序)[如果定义了 sort_order。否则,排序顺序应退回到基于键的字典顺序。
总体意图是有一个可预测的,'sane',复合字典的表示。
基本算法为:
- 字典在列表之前
- 不是可迭代或映射的值优先于是的对象。
- 键不在 sort_order 中的相同类型的值被认为是相等的,应按字典顺序排序。
- 如果 type(A[0]) == type(B) AND A[0] in sort_order 而不是 B[0] in sort_order[,则对象 A 优先于对象 B
- if all([type(A1) == type(B1), A[0] in sort_order, B[0] in sort_order]) 那么对象key的索引位置是优先决定因素。
我已经尝试了几种实现方式,但是我还没有想出任何我会考虑的东西 pythonic/elegant。
这是最新的化身
# -*- coding: utf-8 -*-
import json
from collections import OrderedDict
def dict_sort(obj, sort_order=None):
def seq(s, o=None, v=None):
return str(s) + str(o) + str(v) if o is not None else str(s)
order_seq = None
if sort_order is not None and obj[0] in sort_order:
order_seq = [i for i, v in enumerate(sort_order) if v == obj[0]][0]
if isinstance(obj[1], dict):
return seq(2, order_seq, obj[0]) if order_seq else seq(3)
elif isinstance(obj[1], list):
return seq(4, order_seq, obj[0]) if order_seq else seq(5)
else:
return seq(0, order_seq, obj[0]) if order_seq else seq(1)
def comp_sort(obj, sort_order=None):
data = OrderedDict()
if isinstance(obj, dict):
for key, value in sorted(obj.items(), key=lambda d: dict_sort(d, sort_order)):
if isinstance(value, dict) or isinstance(value, list):
data[key] = comp_sort(value, sort_order)
else:
data[key] = value
elif isinstance(obj, list):
try:
return sorted(obj)
except:
items = []
for value in obj:
if isinstance(value, dict) or isinstance(value, list):
items.append(comp_sort(value, sort_order))
else:
items.append(value)
return items
return data
# thx herk
费了一番功夫,终于想出了一个满足所有要求的方案。它有点慢,但它有效。
我们将不胜感激!
# -*- coding: utf-8 -*-
from __future__ import print_function
from functools import cmp_to_key
import collections
import urllib2
import json
def sort_it(obj=None, sort_order=None):
"""Sort a composite python object.
:param obj: Python object
:param sort_order: optional custom sort order
:rtype: OrderedDict
:returns: Sorted composite object.
"""
# TODO: Refactor to use key rather than cmp (cmp is not supported in python3)
# using cmp_to_key as transitional solution
text_types = (basestring, int, float, complex)
iterable_types = (list, tuple, set, frozenset)
def cmp_func(a, b):
"""Function passed as `cmp` arg to sorted method
Basic Algorithm
- text_types take precedence over non text_types
- Mapping types take precedence over iterable container types
- Values of the same (or similar) type:
- if sort_order is defined
- if both keys are in sort order, the key index position determines precedence
- if only one of the keys are in sort order then it takes precedence
- if neither keys are in sort_order their lexicographic order is the determinant
- otherwise, fall back to lexicographic ordering
:param a: first arg passed to sorted's cmp arg
:param b: second arg passed to sorted's cmp arg
:rtype: int
:return: int to determine which object (a/b) should take precedence
"""
# ensure a and b are k/v pairs
if not any([len(a) == 2, len(b) == 2]):
return 0
# text_types take precedence over non-text types
elif isinstance(a[1], text_types) and not isinstance(b[1], text_types):
return -1
elif not isinstance(a[1], text_types) and isinstance(b[1], text_types):
return 1
# Mappings take precedence over iterable types
elif isinstance(a[1], collections.Mapping) and isinstance(b[1], iterable_types):
return -1
elif isinstance(b[1], collections.Mapping) and isinstance(a[1], iterable_types):
return 1
# if type of values are of the same/similar type
elif any([isinstance(a[1], text_types) and isinstance(b[1], text_types),
isinstance(a[1], iterable_types) and isinstance(b[1], iterable_types),
isinstance(a[1], collections.Mapping) and isinstance(b[1], collections.Mapping),
isinstance(a[1], type(b[1])),
]):
if sort_order:
if any([a[0] in sort_order, b[0] in sort_order]):
if a[0] in sort_order and b[0] not in sort_order:
return -1
if b[0] in sort_order and a[0] not in sort_order:
return 1
if a[0] in sort_order and b[0] in sort_order:
if sort_order.index(a[0]) > sort_order.index(b[0]):
return 1
else:
return -1
# no sort_order ( or keys not in sort_order) -- sort lexicographically
if sorted([a[0].lower(), b[0].lower()]).index(a[0].lower()) == 0:
return -1
elif sorted([a[0].lower(), b[0].lower()]).index(a[0].lower()) == 1:
return 1
else:
raise ValueError('Unhandled condition for values %s, %s' % (a, b))
if isinstance(obj, collections.Mapping):
return collections.OrderedDict(
(key, sort_it(value, sort_order=sort_order)) for key, value in
sorted(obj.items(), key=cmp_to_key(cmp_func)))
elif isinstance(obj, iterable_types):
return type(obj)([sort_it(value, sort_order=sort_order) for value in obj])
else:
return obj
sort_order = [
'id',
'rn',
'dn',
'vendor',
'model',
'serial',
'name',
'description',
'tray'
'presence'
]
sample_data_uri = 'https://bit.ly/1jOpQF2'
### EXAMPLE - Sans sort order
print(json.dumps(sort_it(json.loads(urllib2.urlopen(sample_data_uri).read())), indent=4))
### EXAMPLE - with sort_order
print(json.dumps(sort_it(json.loads(urllib2.urlopen(sample_data_uri).read()), sort_order=sort_order), indent=4))
我正在从 postgres(jsonb 类型)检索数据,我需要 return 一个 OrderedDict,它具有可预测的人机消费顺序。有一些通用的(ish)键应该用于指导通用类型值的优先级(基于预定义的顺序)[如果定义了 sort_order。否则,排序顺序应退回到基于键的字典顺序。
总体意图是有一个可预测的,'sane',复合字典的表示。
基本算法为:
- 字典在列表之前
- 不是可迭代或映射的值优先于是的对象。
- 键不在 sort_order 中的相同类型的值被认为是相等的,应按字典顺序排序。
- 如果 type(A[0]) == type(B) AND A[0] in sort_order 而不是 B[0] in sort_order[,则对象 A 优先于对象 B
- if all([type(A1) == type(B1), A[0] in sort_order, B[0] in sort_order]) 那么对象key的索引位置是优先决定因素。
我已经尝试了几种实现方式,但是我还没有想出任何我会考虑的东西 pythonic/elegant。
这是最新的化身
# -*- coding: utf-8 -*-
import json
from collections import OrderedDict
def dict_sort(obj, sort_order=None):
def seq(s, o=None, v=None):
return str(s) + str(o) + str(v) if o is not None else str(s)
order_seq = None
if sort_order is not None and obj[0] in sort_order:
order_seq = [i for i, v in enumerate(sort_order) if v == obj[0]][0]
if isinstance(obj[1], dict):
return seq(2, order_seq, obj[0]) if order_seq else seq(3)
elif isinstance(obj[1], list):
return seq(4, order_seq, obj[0]) if order_seq else seq(5)
else:
return seq(0, order_seq, obj[0]) if order_seq else seq(1)
def comp_sort(obj, sort_order=None):
data = OrderedDict()
if isinstance(obj, dict):
for key, value in sorted(obj.items(), key=lambda d: dict_sort(d, sort_order)):
if isinstance(value, dict) or isinstance(value, list):
data[key] = comp_sort(value, sort_order)
else:
data[key] = value
elif isinstance(obj, list):
try:
return sorted(obj)
except:
items = []
for value in obj:
if isinstance(value, dict) or isinstance(value, list):
items.append(comp_sort(value, sort_order))
else:
items.append(value)
return items
return data
# thx herk
费了一番功夫,终于想出了一个满足所有要求的方案。它有点慢,但它有效。
我们将不胜感激!
# -*- coding: utf-8 -*-
from __future__ import print_function
from functools import cmp_to_key
import collections
import urllib2
import json
def sort_it(obj=None, sort_order=None):
"""Sort a composite python object.
:param obj: Python object
:param sort_order: optional custom sort order
:rtype: OrderedDict
:returns: Sorted composite object.
"""
# TODO: Refactor to use key rather than cmp (cmp is not supported in python3)
# using cmp_to_key as transitional solution
text_types = (basestring, int, float, complex)
iterable_types = (list, tuple, set, frozenset)
def cmp_func(a, b):
"""Function passed as `cmp` arg to sorted method
Basic Algorithm
- text_types take precedence over non text_types
- Mapping types take precedence over iterable container types
- Values of the same (or similar) type:
- if sort_order is defined
- if both keys are in sort order, the key index position determines precedence
- if only one of the keys are in sort order then it takes precedence
- if neither keys are in sort_order their lexicographic order is the determinant
- otherwise, fall back to lexicographic ordering
:param a: first arg passed to sorted's cmp arg
:param b: second arg passed to sorted's cmp arg
:rtype: int
:return: int to determine which object (a/b) should take precedence
"""
# ensure a and b are k/v pairs
if not any([len(a) == 2, len(b) == 2]):
return 0
# text_types take precedence over non-text types
elif isinstance(a[1], text_types) and not isinstance(b[1], text_types):
return -1
elif not isinstance(a[1], text_types) and isinstance(b[1], text_types):
return 1
# Mappings take precedence over iterable types
elif isinstance(a[1], collections.Mapping) and isinstance(b[1], iterable_types):
return -1
elif isinstance(b[1], collections.Mapping) and isinstance(a[1], iterable_types):
return 1
# if type of values are of the same/similar type
elif any([isinstance(a[1], text_types) and isinstance(b[1], text_types),
isinstance(a[1], iterable_types) and isinstance(b[1], iterable_types),
isinstance(a[1], collections.Mapping) and isinstance(b[1], collections.Mapping),
isinstance(a[1], type(b[1])),
]):
if sort_order:
if any([a[0] in sort_order, b[0] in sort_order]):
if a[0] in sort_order and b[0] not in sort_order:
return -1
if b[0] in sort_order and a[0] not in sort_order:
return 1
if a[0] in sort_order and b[0] in sort_order:
if sort_order.index(a[0]) > sort_order.index(b[0]):
return 1
else:
return -1
# no sort_order ( or keys not in sort_order) -- sort lexicographically
if sorted([a[0].lower(), b[0].lower()]).index(a[0].lower()) == 0:
return -1
elif sorted([a[0].lower(), b[0].lower()]).index(a[0].lower()) == 1:
return 1
else:
raise ValueError('Unhandled condition for values %s, %s' % (a, b))
if isinstance(obj, collections.Mapping):
return collections.OrderedDict(
(key, sort_it(value, sort_order=sort_order)) for key, value in
sorted(obj.items(), key=cmp_to_key(cmp_func)))
elif isinstance(obj, iterable_types):
return type(obj)([sort_it(value, sort_order=sort_order) for value in obj])
else:
return obj
sort_order = [
'id',
'rn',
'dn',
'vendor',
'model',
'serial',
'name',
'description',
'tray'
'presence'
]
sample_data_uri = 'https://bit.ly/1jOpQF2'
### EXAMPLE - Sans sort order
print(json.dumps(sort_it(json.loads(urllib2.urlopen(sample_data_uri).read())), indent=4))
### EXAMPLE - with sort_order
print(json.dumps(sort_it(json.loads(urllib2.urlopen(sample_data_uri).read()), sort_order=sort_order), indent=4))