在 SQLAlchemy 中加入 outerjoin 之后

Join after outerjoin in SQLAlchemy

假设我有一个 one-to-many 关系,其中 parent 和 children 按 group_id.

分组

Note: this example is a stripped down version of my code, which is actually a many-to-many relationship. There may be some errors unrelated to the question.

class Node(Base):
    __tablename__ = 'node'
    id = Column(GUID, default=uuid.uuid4, primary_key=True)
    group_id = Column(GUID, nullable=False, primary_key=True)
    parent_id = Column(GUID)
    title = Column(Text, nullable=False)

class Leaf(Base):
    __tablename__ = 'leaf'
    id = Column(GUID, nullable=False, primary_key=True)
    group_id = Column(GUID, nullable=False, primary_key=True)
    parent_id = Column(GUID, nullable=False)

group_id 用作创建新版本的方式 - 因此具有相同 id 的节点和叶子可以存在于多个组中。

我要做的是比较两组,找出所有parent发生变化的叶子。我正在尝试使用外部连接进行比较,然后使用两个连接来过滤 parent 节点:

def find_changed_leaves(group_id_a, group_id_b, session):
    NodeA = model.Node
    NodeB = aliased(model.Node, name='node_b')
    LeafA = model.Leaf
    LeafB = aliased(model.Leaf, name='leaf_b')

    query = (session.query(LeafA, LeafB)
        .outerjoin(LeafB, LeafA.id == LeafB.id)

        .join(NodeA, (LeafA.group_id == NodeA.group_id) &
                     (LeafA.parent_id == NodeA.id))
        .join(NodeB, (LeafB.group_id == NodeB.group_id) &
                     (LeafB.parent_id == NodeB.id))

        # Group membership
        .filter(LeafA.group_id == group_id_a,
                LeafB.group_id == group_id_b)

        # Filter for modified parents
        .filter(NodeA.title != NodeB.title)
    )

    return query.all()

这有效,但它不显示仅在其中一个组中的叶子(例如,如果将叶子添加到新组中的节点)。我如何显示所有叶子,返回 None 以获得其中一组中缺少的叶子?

编辑: 我看到有 perils mixing join with outer join。我天真地尝试将其更改为 .outerjoin(NodeA, ...,但没有帮助。

如评论中所述,尚不完全清楚需要实现什么。尽管如此,下面的代码至少应该给你一些指导。

首先,我不会尝试将其全部组合在一个查询中(可能使用完整连接和子查询),而是将其拆分为 3 个单独的查询:

  1. 获取 LeafA, LeafB 谁的 parents 已更改
  2. 得到没有对应的LaefALeafB
  3. 得到LaefB没有对应的LeafA

下面的代码应该 运行 与 sqlitepostgresql 中的一样。请注意,我添加了关系并在查询中使用它们。但是您可以像在您的代码片段中那样使用显式连接条件来执行相同的操作。

import uuid

from sqlalchemy import (
    create_engine, Column, Integer, String, ForeignKey, Text, and_,
    ForeignKeyConstraint, UniqueConstraint, exists
)
from sqlalchemy.orm import sessionmaker, relationship, eagerload, aliased
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.dialects.postgresql import UUID as GUID

_db_uri = 'sqlite:///:memory:'; GUID = String
# _db_uri = "postgresql://aaa:bbb@localhost/mytestdb"
engine = create_engine(_db_uri, echo=True)
Session = sessionmaker(bind=engine)
Base = declarative_base(engine)

newid = lambda: str(uuid.uuid4())

# define object model
class Node(Base):
    __tablename__ = 'node'
    id = Column(GUID, default=newid, primary_key=True)
    group_id = Column(GUID, nullable=False, primary_key=True)
    # parent_id = Column(GUID)
    title = Column(Text, nullable=False)


class Leaf(Base):
    __tablename__ = 'leaf'
    id = Column(GUID, nullable=False, primary_key=True)
    group_id = Column(GUID, nullable=False, primary_key=True)
    parent_id = Column(GUID, nullable=False)
    title = Column(Text, nullable=False)

    # define relationships - easier test data creation and querying
    parent = relationship(
        Node,
        primaryjoin=and_(Node.id == parent_id, Node.group_id == group_id),
        backref="children",
    )

    __table_args__ = (
        ForeignKeyConstraint(
            ['parent_id', 'group_id'], ['node.id', 'node.group_id']
        ),
    )


Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)

session = Session()


g1, g2, l1, l2, l3 = [newid() for _ in range(5)]

# Create test data
def _add_test_data():
    n11 = Node(
        title="node1", group_id=g1,
        children=[
            Leaf(id=l1, title="g1 only"),
            Leaf(id=l3, title="both groups"),
        ]
    )

    n21 = Node(
        title="node1 changed", group_id=g2,
        children=[
            Leaf(id=l2, title="g2 only"),
            Leaf(id=l3, title="both groups"),
        ]
    )

    session.add_all([n11, n21])
    session.commit()


def find_changed_leaves(group_id_a, group_id_b):
    """
    Leaves which are in both versions, but a `title` for their parents is changed.
    """
    NodeA = aliased(Node, name='node_a')
    NodeB = aliased(Node, name='node_b')
    LeafA = aliased(Leaf, name='leaf_a')
    LeafB = aliased(Leaf, name='leaf_b')

    query = (
        session.query(LeafA, LeafB)
        .filter(LeafA.group_id == group_id_a)
        # @note: group membership for LeafB is part of join now
        .join(LeafB, (LeafA.id == LeafB.id) & (LeafB.group_id == group_id_b))

        .join(NodeA, LeafA.parent)
        .join(NodeB, LeafB.parent)

        # Filter for modified parents
        .filter(NodeA.title != NodeB.title)
    )
    return query.all()


def find_orphaned_leaves(group_id_a, group_id_b):
    """
    Leaves found in group A, but not in group B.
    """
    LeafA = aliased(Leaf, name='leaf_a')
    LeafB = aliased(Leaf, name='leaf_b')

    query = (
        session.query(LeafA)
        .filter(~(
            session.query(LeafB)
            .filter(LeafA.id == LeafB.id)
            .filter(group_id_b == LeafB.group_id)
            .exists()
        ))

        # Group membership
        .filter(LeafA.group_id == group_id_a)
    )
    return query.all()


def find_deleted_leaves(group_id_a, group_id_b):
    a_s = find_orphaned_leaves(group_id_a, group_id_b)
    return tuple((a, None) for a in a_s)

def find_added_leaves(group_id_a, group_id_b):
    b_s = find_orphaned_leaves(group_id_b, group_id_a)
    return tuple((None, b) for b in b_s)


# add test data
_add_test_data()

# check the results
changed = find_changed_leaves(g1, g2)
assert 1 == len(changed)
le, ri = changed[0]
assert le.id == ri.id == l3

added = find_added_leaves(g1, g2)
assert 1 == len(added)
le, ri = added[0]
assert le is None
assert ri.id == l2

deleted = find_deleted_leaves(g1, g2)
assert 1 == len(deleted)
le, ri = deleted[0]
assert le.id == l1
assert ri is None