如何优化 Postgresql 中的行级安全性
How to optimize Row Level Security in Postgresql
我有基于 API 的 postgres (13.2),启用了 RLS(我使用 postgraphile),它非常慢。
用户从 Google OAuth 发送 JWT。对 table 的访问基于角色(有 2 个:人员、管理员)+ RLS。
我有 2 个 table 用户 auth: person, person_groups
CREATE TABLE IF NOT EXISTS myschema.person_groups (
id serial PRIMARY KEY,
person_id citext NOT NULL REFERENCES myschema.person (id),
google_id text NOT NULL REFERENCES myschema_private.person_account (google_id),
group_id serial NOT NULL REFERENCES myschema.groups (id),
updated_at timestamp DEFAULT now(),
CONSTRAINT unq_person_id_group_id UNIQUE (person_id, group_id)
);
CREATE INDEX persongroups_google_group_idx ON myschema.person_groups (google_id, group_id);
为了检查 RLS,我将函数指定为:
CREATE OR REPLACE FUNCTION myschema.is_in_group (group_id int[])
RETURNS boolean
AS $$
SELECT
CASE WHEN current_setting('role', FALSE) = 'admin' THEN
TRUE
WHEN EXISTS (
SELECT
1
FROM
myschema.person_groups
WHERE
person_groups.group_id = ANY () AND person_groups.google_id = current_setting('user.sub', TRUE)) THEN
TRUE
ELSE
FALSE
END
$$
LANGUAGE SQL
STABLE
STRICT
SECURITY DEFINER;
我有table:“gate_enterlogs”,用户想要访问。
此 table 的 RLS 是:
CREATE POLICY select_gate_enterlog ON myschema.gate_enterlog
FOR SELECT TO person
USING (myschema.is_in_group (ARRAY[6, 1]));
如果我使用这样的代码:
BEGIN;
SET local ROLE person;
SET local "user.sub" TO 'yyy';
EXPLAIN ANALYZE VERBOSE
SELECT COUNT(id) FROM myschema.gate_enterlog;
COMMIT;
我最终得到:
Aggregate (cost=23369.00..23369.01 rows=1 width=8) (actual time=2897.487..2897.487 rows=1 loops=1)
Output: count(id)
-> Seq Scan on myschema.gate_enterlog (cost=0.00..23297.08 rows=28769 width=4) (actual time=2897.484..2897.484 rows=0 loops=1)
Output: id, person_id, checkpoint_time, direction, place
Filter: is_in_group('{6,1}'::integer[])
Rows Removed by Filter: 86308
Planning Time: 0.626 ms
Execution Time: 2897.567 ms
如果我禁用 RLS 策略:
CREATE POLICY select_gate_enterlog ON myschema.gate_enterlog FOR SELECT TO person USING (TRUE);
Aggregate (cost=1935.85..1935.86 rows=1 width=8) (actual time=17.671..17.672 rows=1 loops=1)
Output: count(id)
-> Seq Scan on myschema.gate_enterlog (cost=0.00..1720.08 rows=86308 width=4) (actual time=0.008..7.364 rows=86308 loops=1)
Output: id, person_id, checkpoint_time, direction, place
Planning Time: 0.594 ms
Execution Time: 17.737 ms
你有没有想过我该如何优化 RLS,这样 postgres 就会“记住”用户有访问权限 table。
我唯一的想法是结束 USING (TRUE) for select 并在调用查询之前授予一次访问权限,但在这样做之前我希望有人能给我提示我做错了什么
我想通了。似乎由于某种原因布尔函数没有被优化。我将身份验证功能更改为:
CREATE OR REPLACE FUNCTION myschema.auth_group (group_id int[])
RETURNS SETOF int
AS $$
BEGIN
IF current_setting('role', FALSE) = 'admin' THEN
RETURN QUERY SELECT 1;
ELSIF EXISTS (SELECT 1 FROM myschema.person_groups
WHERE person_groups.google_id = current_setting('user.sub', TRUE) AND person_groups.group_id = ANY ()) THEN
RETURN QUERY SELECT 1;
END IF;
END;
$$
LANGUAGE plpgsql
STABLE STRICT
SECURITY DEFINER;
CREATE POLICY select_gate_enterlog ON myschema.gate_enterlog
FOR SELECT TO person USING (EXISTS (SELECT myschema.auth_group (ARRAY[6, 1])));
有了这样的函数规划器是高效的:
Aggregate (cost=1827.97..1827.98 rows=1 width=8) (actual time=6.005..6.006 rows=1 loops=1)
Output: count(gate_enterlog.id)
InitPlan 1 (returns [=12=])
-> ProjectSet (cost=0.00..5.27 rows=1000 width=4) (actual time=0.158..0.159 rows=0 loops=1)
Output: auth_group(current_setting('role'::text, false), current_setting('user.sub'::text, true), '{6,1}'::integer[])
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.000..0.001 rows=1 loops=1)
-> Seq Scan on mychema.gate_enterlog (cost=0.00..1720.08 rows=43154 width=4) (actual time=6.002..6.002 rows=0 loops=1)
Output: gate_enterlog.id, gate_enterlog.person_id, gate_enterlog.checkpoint_time, gate_enterlog.direction, gate_enterlog.place
Filter: [=12=]
Rows Removed by Filter: 86308
Planning Time: 0.500 ms
Execution Time: 6.100 ms
成本与 RLS 中的 USING(TRUE)
几乎相同。
我有基于 API 的 postgres (13.2),启用了 RLS(我使用 postgraphile),它非常慢。 用户从 Google OAuth 发送 JWT。对 table 的访问基于角色(有 2 个:人员、管理员)+ RLS。 我有 2 个 table 用户 auth: person, person_groups
CREATE TABLE IF NOT EXISTS myschema.person_groups (
id serial PRIMARY KEY,
person_id citext NOT NULL REFERENCES myschema.person (id),
google_id text NOT NULL REFERENCES myschema_private.person_account (google_id),
group_id serial NOT NULL REFERENCES myschema.groups (id),
updated_at timestamp DEFAULT now(),
CONSTRAINT unq_person_id_group_id UNIQUE (person_id, group_id)
);
CREATE INDEX persongroups_google_group_idx ON myschema.person_groups (google_id, group_id);
为了检查 RLS,我将函数指定为:
CREATE OR REPLACE FUNCTION myschema.is_in_group (group_id int[])
RETURNS boolean
AS $$
SELECT
CASE WHEN current_setting('role', FALSE) = 'admin' THEN
TRUE
WHEN EXISTS (
SELECT
1
FROM
myschema.person_groups
WHERE
person_groups.group_id = ANY () AND person_groups.google_id = current_setting('user.sub', TRUE)) THEN
TRUE
ELSE
FALSE
END
$$
LANGUAGE SQL
STABLE
STRICT
SECURITY DEFINER;
我有table:“gate_enterlogs”,用户想要访问。 此 table 的 RLS 是:
CREATE POLICY select_gate_enterlog ON myschema.gate_enterlog
FOR SELECT TO person
USING (myschema.is_in_group (ARRAY[6, 1]));
如果我使用这样的代码:
BEGIN;
SET local ROLE person;
SET local "user.sub" TO 'yyy';
EXPLAIN ANALYZE VERBOSE
SELECT COUNT(id) FROM myschema.gate_enterlog;
COMMIT;
我最终得到:
Aggregate (cost=23369.00..23369.01 rows=1 width=8) (actual time=2897.487..2897.487 rows=1 loops=1)
Output: count(id)
-> Seq Scan on myschema.gate_enterlog (cost=0.00..23297.08 rows=28769 width=4) (actual time=2897.484..2897.484 rows=0 loops=1)
Output: id, person_id, checkpoint_time, direction, place
Filter: is_in_group('{6,1}'::integer[])
Rows Removed by Filter: 86308
Planning Time: 0.626 ms
Execution Time: 2897.567 ms
如果我禁用 RLS 策略:
CREATE POLICY select_gate_enterlog ON myschema.gate_enterlog FOR SELECT TO person USING (TRUE);
Aggregate (cost=1935.85..1935.86 rows=1 width=8) (actual time=17.671..17.672 rows=1 loops=1)
Output: count(id)
-> Seq Scan on myschema.gate_enterlog (cost=0.00..1720.08 rows=86308 width=4) (actual time=0.008..7.364 rows=86308 loops=1)
Output: id, person_id, checkpoint_time, direction, place
Planning Time: 0.594 ms
Execution Time: 17.737 ms
你有没有想过我该如何优化 RLS,这样 postgres 就会“记住”用户有访问权限 table。 我唯一的想法是结束 USING (TRUE) for select 并在调用查询之前授予一次访问权限,但在这样做之前我希望有人能给我提示我做错了什么
我想通了。似乎由于某种原因布尔函数没有被优化。我将身份验证功能更改为:
CREATE OR REPLACE FUNCTION myschema.auth_group (group_id int[])
RETURNS SETOF int
AS $$
BEGIN
IF current_setting('role', FALSE) = 'admin' THEN
RETURN QUERY SELECT 1;
ELSIF EXISTS (SELECT 1 FROM myschema.person_groups
WHERE person_groups.google_id = current_setting('user.sub', TRUE) AND person_groups.group_id = ANY ()) THEN
RETURN QUERY SELECT 1;
END IF;
END;
$$
LANGUAGE plpgsql
STABLE STRICT
SECURITY DEFINER;
CREATE POLICY select_gate_enterlog ON myschema.gate_enterlog
FOR SELECT TO person USING (EXISTS (SELECT myschema.auth_group (ARRAY[6, 1])));
有了这样的函数规划器是高效的:
Aggregate (cost=1827.97..1827.98 rows=1 width=8) (actual time=6.005..6.006 rows=1 loops=1)
Output: count(gate_enterlog.id)
InitPlan 1 (returns [=12=])
-> ProjectSet (cost=0.00..5.27 rows=1000 width=4) (actual time=0.158..0.159 rows=0 loops=1)
Output: auth_group(current_setting('role'::text, false), current_setting('user.sub'::text, true), '{6,1}'::integer[])
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.000..0.001 rows=1 loops=1)
-> Seq Scan on mychema.gate_enterlog (cost=0.00..1720.08 rows=43154 width=4) (actual time=6.002..6.002 rows=0 loops=1)
Output: gate_enterlog.id, gate_enterlog.person_id, gate_enterlog.checkpoint_time, gate_enterlog.direction, gate_enterlog.place
Filter: [=12=]
Rows Removed by Filter: 86308
Planning Time: 0.500 ms
Execution Time: 6.100 ms
成本与 RLS 中的 USING(TRUE)
几乎相同。