knex.js INNER JOIN 结果的 DISTINCT
DISTINCT on results of a knex.js INNER JOIN
我有两个表,metadata
和 view_events
。 metadata
和 view_events
都有 config_id
和 config_type
列。我正在尝试 select 给定用户电子邮件的所有 view_events
,按 config_id
和 config_type
区分,按 timestamp, desc
排序,并限制为最多的 10 个最近的。以下 knex.js 代码 没有 工作,但希望表达我正在努力实现的目标:
return dbClient<AuthenticatedUserIndexRow>(METADATA_TABLE_NAME)
.select([
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.CONFIG_ID}`,
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.CONFIG_TYPE}`,
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.DESCRIPTION}`,
`${VIEW_EVENTS_TABLE_NAME}.${VIEW_EVENTS_COLUMNS.TIMESTAMP}`,
])
.innerJoin<AuthenticatedUserIndexRow>(VIEW_EVENTS_TABLE_NAME, function innerJoinOnViewEvents() {
this.on(
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.STORAGE_ID}`,
'=',
`${VIEW_EVENTS_TABLE_NAME}.${VIEW_EVENTS_COLUMNS.CONFIG_STORAGE_ID}`,
)
.andOn(
`${VIEW_EVENTS_TABLE_NAME}.${VIEW_EVENTS_COLUMNS.USER_EMAIL}`,
'=',
rawSql('?', [authUserEmail]),
)
.andOn(`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.DELETED}`, '=', rawSql('?', [false]));
})
.distinct([
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.CONFIG_TYPE}`,
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.CONFIG_ID}`,
])
.limit(EVENT_LIMIT)
.orderBy(VIEW_EVENTS_COLUMNS.TIMESTAMP, 'desc');
例如,给定下表:
view_events
+-------------+-----------+--------------------------+----------------------+
| config_type | config_id | timestamp | email |
+-------------+-----------+--------------------------+----------------------+
| a | foo | 2020-01-23T03:08:14.618Z | john.smith@gmail.com |
| a | foo | 2020-01-23T03:08:14.500Z | jane.doe@gmail.com |
| a | foo | 2020-01-23T03:08:13.618Z | john.smith@gmail.com |
| a | bar | 2020-01-23T03:08:12.618Z | john.smith@gmail.com |
| a | foo | 2020-01-23T03:08:11.618Z | john.smith@gmail.com |
| b | foo | 2020-01-23T03:08:10.618Z | john.smith@gmail.com |
| a | baz | 2020-01-23T03:08:09.618Z | john.smith@gmail.com |
| a | foo | 2020-01-23T03:08:08.618Z | john.smith@gmail.com |
+-------------+-----------+--------------------------+----------------------+
metadata
+-------------+-----------+---------------------------+
| config_type | config_id | description |
+-------------+-----------+---------------------------+
| a | foo | Type a config with id foo |
| a | bar | Type a config with id bar |
| b | foo | Type b config with id foo |
| a | baz | Type a config with id baz |
+-------------+-----------+---------------------------+
我正在尝试获得以下输出(给定 john.smith@gmail.com
的 authUserEmail
):
+-------------+-----------+---------------------------+
| config_type | config_id | description |
+-------------+-----------+---------------------------+
| a | foo | Type a config with id foo |
| a | bar | Type a config with id foo |
| b | foo | Type b config with id foo |
| a | baz | Type a config with id baz |
+-------------+-----------+---------------------------+
我不是 SQL 专家,但我通常知道在这里一起使用 SELECT
和 DISTINCT
是行不通的。正确的做法是什么?
以下大致适合您吗?我确实使用了 with as 这样我们就可以获取 10 个最近的配置 (max(timestamp)..group by config
),然后删除最终投影中的时间戳列。请注意,最终记录可能不会以准确的时间戳顺序出现,因为您不希望在最终输出中使用时间戳,但它们将是最近的 10 条。我没有添加 DELETED
列,但可以想象您会根据问题中的代码重新添加该列。
knex.with('ordered_items', (qb) =>
qb.table('metadata')
.innerJoin('view_events', function() {
this.on('metadata.config_id', '=', 'view_events.config_id')
.andOn('metadata.config_type', '=', 'view_events.config_type')
})
.where({'view_events.email': 'john.smith@gmail.com'})
.select(['metadata.config_type', 'metadata.config_id',
'metadata.description'])
.max('view_events.timestamp', {as: 'max_ts'})
.groupBy(['metadata.config_id', 'metadata.config_type', 'metadata.description'])
.orderBy('max_ts', 'desc')
.limit(10))
.table('ordered_items')
.select(['config_type', 'config_id', 'description'])
我的输入输出:
sqlite> select * from metadata;
a|foo|Type a config with id foo
a|bar|Type a config with id bar
b|foo|Type b config with id foo
a|baz|Type a config with id baz
sqlite> select * from view_events;
a|foo|2020-01-23T03:08:14.618Z|john.smith@gmail.com
a|foo|2020-01-23T03:08:14.500Z|jane.doe@gmail.com
a|foo|2020-01-23T03:08:13.618Z|john.smith@gmail.com
a|bar|2020-01-23T03:08:12.618Z|john.smith@gmail.com
a|foo|2020-01-23T03:08:11.618Z|john.smith@gmail.com
b|foo|2020-01-23T03:08:10.618Z|john.smith@gmail.com
a|baz|2020-01-23T03:08:09.618Z|john.smith@gmail.com
a|foo|2020-01-23T03:08:08.618Z|john.smith@gmail.com
[ { config_type: 'a',
config_id: 'foo',
description: 'Type a config with id foo' },
{ config_type: 'a',
config_id: 'bar',
description: 'Type a config with id bar' },
{ config_type: 'b',
config_id: 'foo',
description: 'Type b config with id foo' },
{ config_type: 'a',
config_id: 'baz',
description: 'Type a config with id baz' } ]
我有两个表,metadata
和 view_events
。 metadata
和 view_events
都有 config_id
和 config_type
列。我正在尝试 select 给定用户电子邮件的所有 view_events
,按 config_id
和 config_type
区分,按 timestamp, desc
排序,并限制为最多的 10 个最近的。以下 knex.js 代码 没有 工作,但希望表达我正在努力实现的目标:
return dbClient<AuthenticatedUserIndexRow>(METADATA_TABLE_NAME)
.select([
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.CONFIG_ID}`,
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.CONFIG_TYPE}`,
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.DESCRIPTION}`,
`${VIEW_EVENTS_TABLE_NAME}.${VIEW_EVENTS_COLUMNS.TIMESTAMP}`,
])
.innerJoin<AuthenticatedUserIndexRow>(VIEW_EVENTS_TABLE_NAME, function innerJoinOnViewEvents() {
this.on(
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.STORAGE_ID}`,
'=',
`${VIEW_EVENTS_TABLE_NAME}.${VIEW_EVENTS_COLUMNS.CONFIG_STORAGE_ID}`,
)
.andOn(
`${VIEW_EVENTS_TABLE_NAME}.${VIEW_EVENTS_COLUMNS.USER_EMAIL}`,
'=',
rawSql('?', [authUserEmail]),
)
.andOn(`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.DELETED}`, '=', rawSql('?', [false]));
})
.distinct([
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.CONFIG_TYPE}`,
`${METADATA_TABLE_NAME}.${METADATA_COLUMNS.CONFIG_ID}`,
])
.limit(EVENT_LIMIT)
.orderBy(VIEW_EVENTS_COLUMNS.TIMESTAMP, 'desc');
例如,给定下表:
view_events
+-------------+-----------+--------------------------+----------------------+
| config_type | config_id | timestamp | email |
+-------------+-----------+--------------------------+----------------------+
| a | foo | 2020-01-23T03:08:14.618Z | john.smith@gmail.com |
| a | foo | 2020-01-23T03:08:14.500Z | jane.doe@gmail.com |
| a | foo | 2020-01-23T03:08:13.618Z | john.smith@gmail.com |
| a | bar | 2020-01-23T03:08:12.618Z | john.smith@gmail.com |
| a | foo | 2020-01-23T03:08:11.618Z | john.smith@gmail.com |
| b | foo | 2020-01-23T03:08:10.618Z | john.smith@gmail.com |
| a | baz | 2020-01-23T03:08:09.618Z | john.smith@gmail.com |
| a | foo | 2020-01-23T03:08:08.618Z | john.smith@gmail.com |
+-------------+-----------+--------------------------+----------------------+
metadata
+-------------+-----------+---------------------------+
| config_type | config_id | description |
+-------------+-----------+---------------------------+
| a | foo | Type a config with id foo |
| a | bar | Type a config with id bar |
| b | foo | Type b config with id foo |
| a | baz | Type a config with id baz |
+-------------+-----------+---------------------------+
我正在尝试获得以下输出(给定 john.smith@gmail.com
的 authUserEmail
):
+-------------+-----------+---------------------------+
| config_type | config_id | description |
+-------------+-----------+---------------------------+
| a | foo | Type a config with id foo |
| a | bar | Type a config with id foo |
| b | foo | Type b config with id foo |
| a | baz | Type a config with id baz |
+-------------+-----------+---------------------------+
我不是 SQL 专家,但我通常知道在这里一起使用 SELECT
和 DISTINCT
是行不通的。正确的做法是什么?
以下大致适合您吗?我确实使用了 with as 这样我们就可以获取 10 个最近的配置 (max(timestamp)..group by config
),然后删除最终投影中的时间戳列。请注意,最终记录可能不会以准确的时间戳顺序出现,因为您不希望在最终输出中使用时间戳,但它们将是最近的 10 条。我没有添加 DELETED
列,但可以想象您会根据问题中的代码重新添加该列。
knex.with('ordered_items', (qb) =>
qb.table('metadata')
.innerJoin('view_events', function() {
this.on('metadata.config_id', '=', 'view_events.config_id')
.andOn('metadata.config_type', '=', 'view_events.config_type')
})
.where({'view_events.email': 'john.smith@gmail.com'})
.select(['metadata.config_type', 'metadata.config_id',
'metadata.description'])
.max('view_events.timestamp', {as: 'max_ts'})
.groupBy(['metadata.config_id', 'metadata.config_type', 'metadata.description'])
.orderBy('max_ts', 'desc')
.limit(10))
.table('ordered_items')
.select(['config_type', 'config_id', 'description'])
我的输入输出:
sqlite> select * from metadata;
a|foo|Type a config with id foo
a|bar|Type a config with id bar
b|foo|Type b config with id foo
a|baz|Type a config with id baz
sqlite> select * from view_events;
a|foo|2020-01-23T03:08:14.618Z|john.smith@gmail.com
a|foo|2020-01-23T03:08:14.500Z|jane.doe@gmail.com
a|foo|2020-01-23T03:08:13.618Z|john.smith@gmail.com
a|bar|2020-01-23T03:08:12.618Z|john.smith@gmail.com
a|foo|2020-01-23T03:08:11.618Z|john.smith@gmail.com
b|foo|2020-01-23T03:08:10.618Z|john.smith@gmail.com
a|baz|2020-01-23T03:08:09.618Z|john.smith@gmail.com
a|foo|2020-01-23T03:08:08.618Z|john.smith@gmail.com
[ { config_type: 'a',
config_id: 'foo',
description: 'Type a config with id foo' },
{ config_type: 'a',
config_id: 'bar',
description: 'Type a config with id bar' },
{ config_type: 'b',
config_id: 'foo',
description: 'Type b config with id foo' },
{ config_type: 'a',
config_id: 'baz',
description: 'Type a config with id baz' } ]