Python 层次集库(或树上的集合操作)
Python library for hierarchical sets (or set operations on trees)
假设我有以下 hierarchy/tree:
Commit
/ | \
/ Fix \
/ / \ \
Feature CodeFix DocFix Refactoring
我希望能够定义集合,其中的元素是这棵树的叶子。使用标准 python 集合的方法是列出所有要包含的叶子,例如{Feature, CodeFix, DocFix}
。我想要的是使用快捷方式并传递整个 sub-trees 。为此,我们需要一些抽象级别来处理它,比方说 class HSet
。有了这样的 class,应该不仅可以创建这样的集合:HSet(Feature, BugFix, DocFix)
,还可以创建这样的集合:HSet(Feature, Fix)
。根据层次结构,两种选择都应产生相同的 object 并表示自 Fix == CodeFix|DocFix
以来的相同集合。
如果 Fix
有数十个 children,那么拥有这样的快捷方式将特别有益。必须手动在代码中创建大型集合正是我的用例。除了集合创建,我还需要一个快捷方式来列出集合中的元素。例如,如果一个集合包含层次结构中的所有叶子,则它可以由层次结构的根表示,对于上面提供的示例,它是 {Commit}
。请查看示例,这些示例有望更好地说明该概念。
>>> from somelibrary import HSet
>>> commit = HSet.create_root_element("Commit")
>>> feature, fix, refactoring = commit.add_children("Feature", "Fix", "Refactoring")
>>> code_fix, doc_fix = fix.add_children("CodeFix", "DocFix")
>>> str(code_fix | doc_fix)
'{Fix}'
>>> str(code_fix | refactoring)
'{CodeFix|Refactoring}'
>>> str(feature | fix | refactoring)
'{Commit}'
>>> str(~ code_fix)
'{Feature|DocFix|Refactoring}'
>>> str(~ commit)
'{}'
>>> str(~ (feature | refactoring))
'{Fix}'
我很好奇是否存在这样的somelibrary
来实现这个。
更新:实施
与此同时,我写了我正在谈论的概念的实现。但我总是担心不会重新发明轮子。因此,如果能提供指向标准 python 库或自定义库中现有数据结构的指针,可用于 fully/partially 替换我的代码,我将不胜感激。
hset.py
from typing import List
from hset.tree import _Tree
class HSet:
def __init__(self, subtrees: List[_Tree], domain: _Tree):
self.subtrees = domain.collapse_subtrees(subtrees)
self.domain = domain
@classmethod
def create_root_element(cls, label: str) -> 'HSet':
tree = _Tree(label, None, [])
return cls([tree], tree)
def add_children(self, *labels: str) -> List['HSet']:
if len(self.subtrees) != 1:
raise ValueError(f"Cannot add children to this HSet since it has multiple root elements: {self}")
if self.subtrees[0].children:
raise ValueError(f"This HSet already has children.")
trees = self.subtrees[0].add_children(*labels)
return list(map(lambda t: self._create([t]), trees))
def _create(self, subtrees: List[_Tree]) -> 'HSet':
return HSet(subtrees, self.domain)
def __str__(self):
return '{' + "|".join(map(lambda x: x.label, self.subtrees)) + '}'
def __invert__(self) -> 'HSet':
return self._create(self.domain.complement(self.subtrees))
def __or__(self, other: 'HSet') -> 'HSet':
if self.domain != other.domain:
raise ValueError("Cannot perform operations on HSets with different domains")
return self._create(self.domain.collapse_subtrees(self.subtrees + other.subtrees))
tree.py
from dataclasses import dataclass
from typing import Optional, List
@dataclass
class _Tree:
label: str
parent: Optional['_Tree']
children: List['_Tree']
def add_children(self, *labels: str) -> List['_Tree']:
children = [_Tree(label, self, []) for label in labels]
self.children = children
return children
def collapse_subtrees(self, subtrees: List['_Tree']) -> List['_Tree']:
if self in subtrees:
return [self]
elif not self.children:
return []
fully_covered = True
res = []
for child in self.children:
collapsed_child = child.collapse_subtrees(subtrees)
fully_covered &= (collapsed_child == [child])
res.extend(collapsed_child)
return [self] if fully_covered else res
def complement(self, subtrees: List['_Tree']) -> List['_Tree']:
if self in subtrees:
return []
elif not self.children:
return [self]
return [elem for lst in map(lambda x: x.complement(subtrees), self.children) for elem in lst]
干杯,Hlib。
最后,我要找的是 Flag class.
from enum import Flag, auto
class CommitType(Flag):
FEATURE = auto()
CODE_FIX = auto()
DOC_FIX = auto()
REFACTORING = auto()
FIX = BUG_FIX | DOC_FIX
COMMIT = FEATURE | FIX | REFACTORING
>>> CommitType.DOC_FIX | CommitType.CODE_FIX
<CommitType.FIX: 6>
>>> CommitType.DOC_FIX | CommitType.REFACTORING
<CommitType.REFACTORING|CODE_FIX: 10>
>>> CommitType.FEATURE | CommitType.FIX | CommitType.REFACTORING
<CommitType.COMMIT: 15>
>>> ~CommitType.CODE_FIX
<CommitType.REFACTORING|DOC_FIX|FEATURE: 13>
>>> ~CommitType.COMMIT
<CommitType.0: 0>
>>> ~ (CommitType.FEATURE | CommitType.REFACTORING)
<CommitType.FIX: 6>
假设我有以下 hierarchy/tree:
Commit
/ | \
/ Fix \
/ / \ \
Feature CodeFix DocFix Refactoring
我希望能够定义集合,其中的元素是这棵树的叶子。使用标准 python 集合的方法是列出所有要包含的叶子,例如{Feature, CodeFix, DocFix}
。我想要的是使用快捷方式并传递整个 sub-trees 。为此,我们需要一些抽象级别来处理它,比方说 class HSet
。有了这样的 class,应该不仅可以创建这样的集合:HSet(Feature, BugFix, DocFix)
,还可以创建这样的集合:HSet(Feature, Fix)
。根据层次结构,两种选择都应产生相同的 object 并表示自 Fix == CodeFix|DocFix
以来的相同集合。
如果 Fix
有数十个 children,那么拥有这样的快捷方式将特别有益。必须手动在代码中创建大型集合正是我的用例。除了集合创建,我还需要一个快捷方式来列出集合中的元素。例如,如果一个集合包含层次结构中的所有叶子,则它可以由层次结构的根表示,对于上面提供的示例,它是 {Commit}
。请查看示例,这些示例有望更好地说明该概念。
>>> from somelibrary import HSet
>>> commit = HSet.create_root_element("Commit")
>>> feature, fix, refactoring = commit.add_children("Feature", "Fix", "Refactoring")
>>> code_fix, doc_fix = fix.add_children("CodeFix", "DocFix")
>>> str(code_fix | doc_fix)
'{Fix}'
>>> str(code_fix | refactoring)
'{CodeFix|Refactoring}'
>>> str(feature | fix | refactoring)
'{Commit}'
>>> str(~ code_fix)
'{Feature|DocFix|Refactoring}'
>>> str(~ commit)
'{}'
>>> str(~ (feature | refactoring))
'{Fix}'
我很好奇是否存在这样的somelibrary
来实现这个。
更新:实施
与此同时,我写了我正在谈论的概念的实现。但我总是担心不会重新发明轮子。因此,如果能提供指向标准 python 库或自定义库中现有数据结构的指针,可用于 fully/partially 替换我的代码,我将不胜感激。
hset.py
from typing import List
from hset.tree import _Tree
class HSet:
def __init__(self, subtrees: List[_Tree], domain: _Tree):
self.subtrees = domain.collapse_subtrees(subtrees)
self.domain = domain
@classmethod
def create_root_element(cls, label: str) -> 'HSet':
tree = _Tree(label, None, [])
return cls([tree], tree)
def add_children(self, *labels: str) -> List['HSet']:
if len(self.subtrees) != 1:
raise ValueError(f"Cannot add children to this HSet since it has multiple root elements: {self}")
if self.subtrees[0].children:
raise ValueError(f"This HSet already has children.")
trees = self.subtrees[0].add_children(*labels)
return list(map(lambda t: self._create([t]), trees))
def _create(self, subtrees: List[_Tree]) -> 'HSet':
return HSet(subtrees, self.domain)
def __str__(self):
return '{' + "|".join(map(lambda x: x.label, self.subtrees)) + '}'
def __invert__(self) -> 'HSet':
return self._create(self.domain.complement(self.subtrees))
def __or__(self, other: 'HSet') -> 'HSet':
if self.domain != other.domain:
raise ValueError("Cannot perform operations on HSets with different domains")
return self._create(self.domain.collapse_subtrees(self.subtrees + other.subtrees))
tree.py
from dataclasses import dataclass
from typing import Optional, List
@dataclass
class _Tree:
label: str
parent: Optional['_Tree']
children: List['_Tree']
def add_children(self, *labels: str) -> List['_Tree']:
children = [_Tree(label, self, []) for label in labels]
self.children = children
return children
def collapse_subtrees(self, subtrees: List['_Tree']) -> List['_Tree']:
if self in subtrees:
return [self]
elif not self.children:
return []
fully_covered = True
res = []
for child in self.children:
collapsed_child = child.collapse_subtrees(subtrees)
fully_covered &= (collapsed_child == [child])
res.extend(collapsed_child)
return [self] if fully_covered else res
def complement(self, subtrees: List['_Tree']) -> List['_Tree']:
if self in subtrees:
return []
elif not self.children:
return [self]
return [elem for lst in map(lambda x: x.complement(subtrees), self.children) for elem in lst]
干杯,Hlib。
最后,我要找的是 Flag class.
from enum import Flag, auto
class CommitType(Flag):
FEATURE = auto()
CODE_FIX = auto()
DOC_FIX = auto()
REFACTORING = auto()
FIX = BUG_FIX | DOC_FIX
COMMIT = FEATURE | FIX | REFACTORING
>>> CommitType.DOC_FIX | CommitType.CODE_FIX
<CommitType.FIX: 6>
>>> CommitType.DOC_FIX | CommitType.REFACTORING
<CommitType.REFACTORING|CODE_FIX: 10>
>>> CommitType.FEATURE | CommitType.FIX | CommitType.REFACTORING
<CommitType.COMMIT: 15>
>>> ~CommitType.CODE_FIX
<CommitType.REFACTORING|DOC_FIX|FEATURE: 13>
>>> ~CommitType.COMMIT
<CommitType.0: 0>
>>> ~ (CommitType.FEATURE | CommitType.REFACTORING)
<CommitType.FIX: 6>