PostgreSQL 循环索引
PostgreSQL Circular Index
我有一个简单的问题:"How to handle with circular index or reference"
主要思想是:
弄清楚你是否有一些数据,比如字符串中的位置,并且你有很多字符串。
String N = {n1 : [start1, end1], n2 : [start2, end2], ..., nn : [startn, endn]}
String A = {a : [1, 10], b : [15, 20], c : [21, 50]}
String B = {a : [52, 8], b : [10, 20], c : [21, 55]}
每个字符串中的项目可能有重叠,开始和结束位置不太重要(只是为了保持项目顺序,例如 b
在 a
之后和 c
之前)
我只构建了另一个列来将其他列分配给每个字符串中的项目。
String N = {n1 : [start1, end1, index1], n2 : [start2, end2, index2], ..., nn : [startn, endn, indexnn]}
String A = {a : [1, 10, 1], b : [15, 20, 2], c : [21, 50, 3]}
String B = {a : [52, 8, 1], b : [10, 20, 2], c : [21, 55, 3]}
想法是:有时我需要假设其他项目很重要来进行查询,因此我会进行一些查询以检索查询前 2 和后 2 之间的所有项目。
现在查询非常简单:
SELECT *
FROM strings
WHERE string = 'A'
AND index BETWEEN (
SELECT index
FROM strings
WHERE string = 'A' AND item = b
) - 1 AND (
SELECT index
FROM strings
WHERE string = 'A' AND item = b
) + 1;
[*或比这更好的东西]
它将 return 项 [a, b, c]
但是,如果查询是项目 c
,我如何从 String A
检索项目 a
?
如果我为项目 c
做:
SELECT *
FROM strings
WHERE string = 'A'
AND index BETWEEN (
SELECT index
FROM strings
WHERE string = 'A' AND item = c
) - 1 AND (
SELECT index
FROM strings
WHERE string = 'A' AND item = c
) + 1;
不会return我[b, c, a]
,只会return[b, c]
。
提前致谢
示例:
Table
CREATE TEMP TABLE strings (
string_name VARCHAR,
item VARCHAR,
s_start INTEGER,
s_end INTEGER,
idx INTEGER
);
数据
INSERT INTO strings VALUES
('a', 'a1', 10, 20, 1),
('a', 'a2', 10, 20, 2),
('a', 'a3', 10, 20, 3),
('a', 'a4', 10, 20, 4),
('b', 'b1', 1, 20, 1),
('b', 'b2', 10, 20, 2),
('b', 'a3', 10, 20, 3),
('b', 'c4', 10, 20, 4);
普通查询
WITH myvar as (
SELECT idx as s_idx
FROM strings
WHERE string_name = 'b' AND item = 'a3'
)
SELECT *
FROM strings AS s
JOIN myvar
ON true
WHERE string_name = 'b'
AND idx BETWEEN s_idx -1 AND s_idx + 1;
输出:
string_name | item | s_start | s_end | idx | s_idx
-------------+------+---------+-------+-----+-------
b | b2 | 10 | 20 | 2 | 3
b | a3 | 10 | 20 | 3 | 3
b | c4 | 10 | 20 | 4 | 3
(3 rows)
有问题的查询(当 idx
高于或低于来自 String B
的项目 c4
的字符串时,后者是最后一个 idx
)
WITH myvar as (
SELECT idx as s_idx
FROM strings
WHERE string_name = 'b'
AND item = 'c4'
)
SELECT *
FROM strings AS s
JOIN myvar
ON true
WHERE string_name = 'b'
AND idx BETWEEN s_idx -1 AND s_idx + 1;
输出
string_name | item | s_start | s_end | idx | s_idx
-------------+------+---------+-------+-----+-------
b | a3 | 10 | 20 | 3 | 4
b | c4 | 10 | 20 | 4 | 4
(2 rows)
预期输出
string_name | item | s_start | s_end | idx | s_idx
-------------+------+---------+-------+-----+-------
b | a3 | 10 | 20 | 3 | 4
b | c4 | 10 | 20 | 4 | 4
b | b1 | 1 | 20 | 1 | 4
(2 rows)
WITH myvar as (
SELECT
CASE WHEN idx = 1 THEN max_idx ELSE idx - 1 END as prev_idx, -- 2
idx as s_idx,
CASE WHEN idx = max_idx THEN 1 ELSE idx + 1 END as next_idx
FROM (
SELECT
*,
MAX(idx) OVER (PARTITION BY string_name) as max_idx -- 1
FROM strings
WHERE string_name = 'b'
) s
WHERE item = 'c4'
)
SELECT s.*
FROM strings AS s
JOIN myvar
ON true
WHERE string_name = 'b'
AND idx = ANY (ARRAY[prev_idx, s_idx, next_idx]) -- 3
- 获取每个字符串的最大值
idx
。我通过使用 window function MAX
得到了这个
- 现在我可以检查前一个
idx
是否应该滚动到最后一个(如果当前 idx
是第一个)或者下一个 idx
是否应该滚动到最后一个第一个(如果当前是最后一个)。
- 我没有使用
BETWEEN
因为在你的情况下它会导致严重的问题。因为 4,3,1
会导致 BETWEEN 1 AND 4
也会得到 2
。所以我用这三个值做了一个数组,但还有很多其他方法(子查询而不是 CTE 例如)
如果你有更大的范围,比如 [-3, +3] 这种方式可能会非常讨厌。在这种情况下,我会尝试使用 modulo:
WITH myvar as (
SELECT *
FROM (
SELECT
idx as s_idx,
item,
MAX(idx) OVER (PARTITION BY string_name) + 1 as max_idx
FROM strings
WHERE string_name = 'b'
)s
WHERE item = 'g7'
)
SELECT
s.*
FROM strings AS s
JOIN myvar
ON true
WHERE string_name = 'b'
AND idx = ANY (ARRAY[
(s_idx - 3) % max_idx,
(s_idx - 2) % max_idx,
(s_idx - 1) % max_idx,
s_idx,
(s_idx + 1) % max_idx,
(s_idx + 2) % max_idx,
(s_idx + 3) % max_idx
])
数组部分也可以用generate_series生成。所以对于不同的范围更灵活:
... AND idx IN (
SELECT (s_idx + gs) % max_idx
FROM myvar, generate_series(-3, 3) gs
)
我有一个简单的问题:"How to handle with circular index or reference"
主要思想是: 弄清楚你是否有一些数据,比如字符串中的位置,并且你有很多字符串。
String N = {n1 : [start1, end1], n2 : [start2, end2], ..., nn : [startn, endn]}
String A = {a : [1, 10], b : [15, 20], c : [21, 50]}
String B = {a : [52, 8], b : [10, 20], c : [21, 55]}
每个字符串中的项目可能有重叠,开始和结束位置不太重要(只是为了保持项目顺序,例如 b
在 a
之后和 c
之前)
我只构建了另一个列来将其他列分配给每个字符串中的项目。
String N = {n1 : [start1, end1, index1], n2 : [start2, end2, index2], ..., nn : [startn, endn, indexnn]}
String A = {a : [1, 10, 1], b : [15, 20, 2], c : [21, 50, 3]}
String B = {a : [52, 8, 1], b : [10, 20, 2], c : [21, 55, 3]}
想法是:有时我需要假设其他项目很重要来进行查询,因此我会进行一些查询以检索查询前 2 和后 2 之间的所有项目。
现在查询非常简单:
SELECT *
FROM strings
WHERE string = 'A'
AND index BETWEEN (
SELECT index
FROM strings
WHERE string = 'A' AND item = b
) - 1 AND (
SELECT index
FROM strings
WHERE string = 'A' AND item = b
) + 1;
[*或比这更好的东西]
它将 return 项 [a, b, c]
但是,如果查询是项目 c
,我如何从 String A
检索项目 a
?
如果我为项目 c
做:
SELECT *
FROM strings
WHERE string = 'A'
AND index BETWEEN (
SELECT index
FROM strings
WHERE string = 'A' AND item = c
) - 1 AND (
SELECT index
FROM strings
WHERE string = 'A' AND item = c
) + 1;
不会return我[b, c, a]
,只会return[b, c]
。
提前致谢
示例:
Table
CREATE TEMP TABLE strings (
string_name VARCHAR,
item VARCHAR,
s_start INTEGER,
s_end INTEGER,
idx INTEGER
);
数据
INSERT INTO strings VALUES
('a', 'a1', 10, 20, 1),
('a', 'a2', 10, 20, 2),
('a', 'a3', 10, 20, 3),
('a', 'a4', 10, 20, 4),
('b', 'b1', 1, 20, 1),
('b', 'b2', 10, 20, 2),
('b', 'a3', 10, 20, 3),
('b', 'c4', 10, 20, 4);
普通查询
WITH myvar as (
SELECT idx as s_idx
FROM strings
WHERE string_name = 'b' AND item = 'a3'
)
SELECT *
FROM strings AS s
JOIN myvar
ON true
WHERE string_name = 'b'
AND idx BETWEEN s_idx -1 AND s_idx + 1;
输出:
string_name | item | s_start | s_end | idx | s_idx
-------------+------+---------+-------+-----+-------
b | b2 | 10 | 20 | 2 | 3
b | a3 | 10 | 20 | 3 | 3
b | c4 | 10 | 20 | 4 | 3
(3 rows)
有问题的查询(当 idx
高于或低于来自 String B
的项目 c4
的字符串时,后者是最后一个 idx
)
WITH myvar as (
SELECT idx as s_idx
FROM strings
WHERE string_name = 'b'
AND item = 'c4'
)
SELECT *
FROM strings AS s
JOIN myvar
ON true
WHERE string_name = 'b'
AND idx BETWEEN s_idx -1 AND s_idx + 1;
输出
string_name | item | s_start | s_end | idx | s_idx
-------------+------+---------+-------+-----+-------
b | a3 | 10 | 20 | 3 | 4
b | c4 | 10 | 20 | 4 | 4
(2 rows)
预期输出
string_name | item | s_start | s_end | idx | s_idx
-------------+------+---------+-------+-----+-------
b | a3 | 10 | 20 | 3 | 4
b | c4 | 10 | 20 | 4 | 4
b | b1 | 1 | 20 | 1 | 4
(2 rows)
WITH myvar as (
SELECT
CASE WHEN idx = 1 THEN max_idx ELSE idx - 1 END as prev_idx, -- 2
idx as s_idx,
CASE WHEN idx = max_idx THEN 1 ELSE idx + 1 END as next_idx
FROM (
SELECT
*,
MAX(idx) OVER (PARTITION BY string_name) as max_idx -- 1
FROM strings
WHERE string_name = 'b'
) s
WHERE item = 'c4'
)
SELECT s.*
FROM strings AS s
JOIN myvar
ON true
WHERE string_name = 'b'
AND idx = ANY (ARRAY[prev_idx, s_idx, next_idx]) -- 3
- 获取每个字符串的最大值
idx
。我通过使用 window functionMAX
得到了这个
- 现在我可以检查前一个
idx
是否应该滚动到最后一个(如果当前idx
是第一个)或者下一个idx
是否应该滚动到最后一个第一个(如果当前是最后一个)。 - 我没有使用
BETWEEN
因为在你的情况下它会导致严重的问题。因为4,3,1
会导致BETWEEN 1 AND 4
也会得到2
。所以我用这三个值做了一个数组,但还有很多其他方法(子查询而不是 CTE 例如)
如果你有更大的范围,比如 [-3, +3] 这种方式可能会非常讨厌。在这种情况下,我会尝试使用 modulo:
WITH myvar as (
SELECT *
FROM (
SELECT
idx as s_idx,
item,
MAX(idx) OVER (PARTITION BY string_name) + 1 as max_idx
FROM strings
WHERE string_name = 'b'
)s
WHERE item = 'g7'
)
SELECT
s.*
FROM strings AS s
JOIN myvar
ON true
WHERE string_name = 'b'
AND idx = ANY (ARRAY[
(s_idx - 3) % max_idx,
(s_idx - 2) % max_idx,
(s_idx - 1) % max_idx,
s_idx,
(s_idx + 1) % max_idx,
(s_idx + 2) % max_idx,
(s_idx + 3) % max_idx
])
数组部分也可以用generate_series生成。所以对于不同的范围更灵活:
... AND idx IN (
SELECT (s_idx + gs) % max_idx
FROM myvar, generate_series(-3, 3) gs
)