如何使用多个条件过滤 jsonb?
How do I filter jsonb with multiple criteria?
我有以下 table 结构:
CREATE TABLE mytable (
id serial PRIMARY KEY,
data jsonb
);
以及以下数据(为简洁起见,部分数据...请注意年份的随机性和 sales/expense 年份彼此不一致):
INSERT INTO mytable (data)
VALUES
('{"employee": "Jim Romo",
"sales": [{"value": 10, "yr": "2012"}, {"value": 5, "yr": "2013"}, {"value": 40, "yr": "2014"}],
"expenses": [{"value": 2, "yr": "2007"}, {"value": 1, "yr": "2013"}, {"value": 3, "yr": "2014"}],
"product": "tv", "customer": "1", "updated": "20150501"
}'),
('{"employee": "Jim Romo",
"sales": [{"value": 10, "yr": "2012"}, {"value": 5, "yr": "2013"}, {"value": 41, "yr": "2014"}],
"expenses": [{"value": 2, "yr": "2009"}, {"value": 3, "yr": "2013"}, {"value": 3, "yr": "2014"}],
"product": "tv", "customer": "2", "updated": "20150312"
}'),
('{"employee": "Jim Romo",
"sales": [{"value": 20, "yr": "2012"}, {"value": 25, "yr": "2013"}, {"value": 33, "yr": "2014"}],
"expenses": [{"value": 8, "yr": "2012"}, {"value": 12, "yr": "2014"}, {"value": 5, "yr": "2009"}],
"product": "radio", "customer": "2", "updated": "20150311"
}'),
('{"employee": "Bill Baker",
"sales": [{"value": 1, "yr": "2010"}, {"value": 2, "yr": "2009"}, {"value": 3, "yr": "2014"}],
"expenses": [{"value": 3, "yr": "2011"}, {"value": 1, "yr": "2012"}, {"value": 7, "yr": "2013"}],
"product": "tv", "customer": "1", "updated": "20150205"
}'),
('{"employee": "Bill Baker",
"sales": [{"value": 10, "yr": "2010"}, {"value": 12, "yr": "2011"}, {"value": 3, "yr": "2014"}],
"expenses": [{"value": 4, "yr": "2011"}, {"value": 7, "yr": "2009"}, {"value": 4, "yr": "2013"}],
"product": "radio", "customer": "1", "updated": "20150204"
}'),
('{"employee": "Jim Romo",
"sales": [{"value": 22, "yr": "2009"}, {"value": 17, "yr": "2013"}, {"value": 35, "yr": "2014"}],
"expenses": [{"value": 14, "yr": "2011"}, {"value": 13, "yr": "2014"}, {"value": 8, "yr": "2013"}],
"product": "tv", "customer": "3", "updated": "20150118"
}')
对于每个员工,我需要评估最近更新的行并找到 2014 年电视销售额大于 30 的员工。从那里我需要进一步过滤平均电视费用低于 5 的员工。对于平均我只需要承担他们所有的电视费用,而不仅仅是最新一排。
我的预期输出是 1 行:
employee | customer | 2014 tv sales | 2013 avg tv expenses
------------+----------+-----------------+----------------------
Jim Romo | 1 | 40 | 4
我可以(kindof)做其中之一,但不能同时做:
一个。获取 2014 年销售额 > 30(但无法获取他们最近的 "tv" 销售额;(
SELECT * FROM mytable WHERE (SELECT (a->>'value')::float FROM
(SELECT jsonb_array_elements(data->'sales') as a) as b
WHERE a @> json_object(ARRAY['yr', '2014'])::jsonb) > 30
b。获取平均 2013 年费用(这需要是平均电视费用)
SELECT avg((a->>'value')::numeric) FROM
(SELECT jsonb_array_elements(data->'expenses') as a FROM mytable) as b
WHERE a @> json_object(ARRAY['yr', '2013'])::jsonb
编辑:这可能是一个非常大的table,所以任何关于性能和索引需求的评论都将不胜感激,因为我是 postgresql 和 jsonb 的新手。
编辑 #2:我已经尝试了两个答案,但对于大型 table 似乎都没有效率;(
这是对您的问题的(相当冗长的)回答。查询中的注释应该解释不同的部分。我遵循的基本思路是:1)每个操作都保持简单,先尝试产生正确的结果,然后再优化; 2) 尽可能多地(但不要太多)将 json 结构转换为更 "relational like" 的结构,因为关系具有比 postgres 中的 json 数据更强大的运算符。当然还有 space 来简化查询甚至生成更高效的版本,但至少这是一个起点。
with mytable1 as -- transform the table in a more "relational-like" structure (just for clarity)
(select id, data->>'employee' as employee, data->>'product' as product,
(data->>'updated')::integer as updated, (data->>'customer')::integer as customer,
data->'sales' as sales, data->'expenses' as expenses
from mytable),
avg_exp_for_2013_tv as -- find the average expenses for tv in 2013 for each employee
(select employee, avg(expenses.value) as avg2013_expenses
from mytable1 , jsonb_to_recordset(expenses) as expenses(yr text, value float)
where product = 'tv' and expenses.yr = '2013'
group by employee),
most_recent_updates_employees as -- find the most recent updates for each employee
(select employee, max(updated) as updated
from mytable1 t1
group by employee),
most_recent_updated_rows as -- find the rows with the most recent updates
(select t1.*
from mytable1 t1, most_recent_updates_employees m
where t1.employee = m.employee and t1.updated = m.updated),
employees_with_2014_tv_sales_gt_30 as
(select employee, customer, sales.value as sales_value
from most_recent_updated_rows m, jsonb_to_recordset(m.sales) as sales(yr text, value float)
where yr = '2014' and value > 30)
select e1.employee, e1.customer, e1.sales_value as "2014 tv sales", e2.avg2013_expenses as "2013 avg tv expenses"
from employees_with_2014_tv_sales_gt_30 e1, avg_exp_for_2013_tv e2
where e1.employee = e2.employee and avg2013_expenses < 5
我有以下 table 结构:
CREATE TABLE mytable (
id serial PRIMARY KEY,
data jsonb
);
以及以下数据(为简洁起见,部分数据...请注意年份的随机性和 sales/expense 年份彼此不一致):
INSERT INTO mytable (data)
VALUES
('{"employee": "Jim Romo",
"sales": [{"value": 10, "yr": "2012"}, {"value": 5, "yr": "2013"}, {"value": 40, "yr": "2014"}],
"expenses": [{"value": 2, "yr": "2007"}, {"value": 1, "yr": "2013"}, {"value": 3, "yr": "2014"}],
"product": "tv", "customer": "1", "updated": "20150501"
}'),
('{"employee": "Jim Romo",
"sales": [{"value": 10, "yr": "2012"}, {"value": 5, "yr": "2013"}, {"value": 41, "yr": "2014"}],
"expenses": [{"value": 2, "yr": "2009"}, {"value": 3, "yr": "2013"}, {"value": 3, "yr": "2014"}],
"product": "tv", "customer": "2", "updated": "20150312"
}'),
('{"employee": "Jim Romo",
"sales": [{"value": 20, "yr": "2012"}, {"value": 25, "yr": "2013"}, {"value": 33, "yr": "2014"}],
"expenses": [{"value": 8, "yr": "2012"}, {"value": 12, "yr": "2014"}, {"value": 5, "yr": "2009"}],
"product": "radio", "customer": "2", "updated": "20150311"
}'),
('{"employee": "Bill Baker",
"sales": [{"value": 1, "yr": "2010"}, {"value": 2, "yr": "2009"}, {"value": 3, "yr": "2014"}],
"expenses": [{"value": 3, "yr": "2011"}, {"value": 1, "yr": "2012"}, {"value": 7, "yr": "2013"}],
"product": "tv", "customer": "1", "updated": "20150205"
}'),
('{"employee": "Bill Baker",
"sales": [{"value": 10, "yr": "2010"}, {"value": 12, "yr": "2011"}, {"value": 3, "yr": "2014"}],
"expenses": [{"value": 4, "yr": "2011"}, {"value": 7, "yr": "2009"}, {"value": 4, "yr": "2013"}],
"product": "radio", "customer": "1", "updated": "20150204"
}'),
('{"employee": "Jim Romo",
"sales": [{"value": 22, "yr": "2009"}, {"value": 17, "yr": "2013"}, {"value": 35, "yr": "2014"}],
"expenses": [{"value": 14, "yr": "2011"}, {"value": 13, "yr": "2014"}, {"value": 8, "yr": "2013"}],
"product": "tv", "customer": "3", "updated": "20150118"
}')
对于每个员工,我需要评估最近更新的行并找到 2014 年电视销售额大于 30 的员工。从那里我需要进一步过滤平均电视费用低于 5 的员工。对于平均我只需要承担他们所有的电视费用,而不仅仅是最新一排。
我的预期输出是 1 行:
employee | customer | 2014 tv sales | 2013 avg tv expenses
------------+----------+-----------------+----------------------
Jim Romo | 1 | 40 | 4
我可以(kindof)做其中之一,但不能同时做:
一个。获取 2014 年销售额 > 30(但无法获取他们最近的 "tv" 销售额;(
SELECT * FROM mytable WHERE (SELECT (a->>'value')::float FROM
(SELECT jsonb_array_elements(data->'sales') as a) as b
WHERE a @> json_object(ARRAY['yr', '2014'])::jsonb) > 30
b。获取平均 2013 年费用(这需要是平均电视费用)
SELECT avg((a->>'value')::numeric) FROM
(SELECT jsonb_array_elements(data->'expenses') as a FROM mytable) as b
WHERE a @> json_object(ARRAY['yr', '2013'])::jsonb
编辑:这可能是一个非常大的table,所以任何关于性能和索引需求的评论都将不胜感激,因为我是 postgresql 和 jsonb 的新手。
编辑 #2:我已经尝试了两个答案,但对于大型 table 似乎都没有效率;(
这是对您的问题的(相当冗长的)回答。查询中的注释应该解释不同的部分。我遵循的基本思路是:1)每个操作都保持简单,先尝试产生正确的结果,然后再优化; 2) 尽可能多地(但不要太多)将 json 结构转换为更 "relational like" 的结构,因为关系具有比 postgres 中的 json 数据更强大的运算符。当然还有 space 来简化查询甚至生成更高效的版本,但至少这是一个起点。
with mytable1 as -- transform the table in a more "relational-like" structure (just for clarity)
(select id, data->>'employee' as employee, data->>'product' as product,
(data->>'updated')::integer as updated, (data->>'customer')::integer as customer,
data->'sales' as sales, data->'expenses' as expenses
from mytable),
avg_exp_for_2013_tv as -- find the average expenses for tv in 2013 for each employee
(select employee, avg(expenses.value) as avg2013_expenses
from mytable1 , jsonb_to_recordset(expenses) as expenses(yr text, value float)
where product = 'tv' and expenses.yr = '2013'
group by employee),
most_recent_updates_employees as -- find the most recent updates for each employee
(select employee, max(updated) as updated
from mytable1 t1
group by employee),
most_recent_updated_rows as -- find the rows with the most recent updates
(select t1.*
from mytable1 t1, most_recent_updates_employees m
where t1.employee = m.employee and t1.updated = m.updated),
employees_with_2014_tv_sales_gt_30 as
(select employee, customer, sales.value as sales_value
from most_recent_updated_rows m, jsonb_to_recordset(m.sales) as sales(yr text, value float)
where yr = '2014' and value > 30)
select e1.employee, e1.customer, e1.sales_value as "2014 tv sales", e2.avg2013_expenses as "2013 avg tv expenses"
from employees_with_2014_tv_sales_gt_30 e1, avg_exp_for_2013_tv e2
where e1.employee = e2.employee and avg2013_expenses < 5