甲骨文分析查询

Oracle analytics queries

我有这三个表:

create table CUSTOMER(
                id integer not null primary key,
                name varchar(255) not null
);

create table PRODUCT(
                id integer not null primary key,
                name varchar(255)  not null
);

create table INVOICE(
                invoice_number varchar(20) not null primary key,
                invoice_date date not null,
                customer_id integer not null,
                product_id integer not null,
                quantity integer,
                summa numeric(13,2)
);

ALTER TABLE invoice ADD CONSTRAINT FK_invoice_customer FOREIGN KEY (customer_id)
REFERENCES customer(id);
ALTER TABLE invoice ADD CONSTRAINT FK_invoice_product FOREIGN KEY (product_id)
REFERENCES product(id);

我需要针对这些执行两个查询:

查询一:

在 2015/01、2015/02、2015/03 购买过商品(提到的每个月至少一次)但在 2015/04 没有购买过相同商品的客户姓名。

我试过的是这样的:

SELECT * 
FROM invoice i, customer c, product p
 WHERE i.customer_id = c.id 
AND i.product_id = p.id
AND invoice_date BETWEEN '01/01/2015' AND '03/01/2015'
MINUS
SELECT * 
FROM invoice i, customer c, product p 
WHERE i.customer_id = c.id 
AND i.product_id = p.id AND invoice_date BETWEEN '01/04/2015' AND '04/30/2015';

尝试查找所有在 01/01/2015 和 03/01/2015 之间购买过商品(使用的日期格式为 mm/dd/yyyy)以及在 04/01/2015 之间购买过商品的所有客户和 2015 年 4 月 30 日。据我所知,这至少应该在某种程度上朝着正确的方向发展,但像这样,我无法检查客户是每月购买一次还是三个月内只购买一次。

查询二:

应该找到具有相似行为的客户,在给定时间段内(例如一个月)购买相同数量的相同商品的成对客户,数量可能在 5保证金百分比 (+- 5%)。

谢谢大家

您需要知道我们可以在 Oracle 日期上使用 TRUNC();根据格式掩码,我们可以获得一年的第一天或一个月的第一天。此查询使用这两种技巧来生成月份列表并将其加入发票。它还对客户使用交叉连接,以生成客户月份矩阵。

所以现在我们知道顾客是否每个月都买了东西:

select months.mm
       , c.id as customer_id
       , nvl2(max(i.invoice_number), 'Y', 'N')  as bought_something
from ( select add_months(trunc(sysdate, 'yyyy'), level-1) as mm
                 from dual
                 connect by level <= 4 ) months
     cross join customer c
     left outer join invoice i
     on months.mm = trunc(i.invoice_date, 'MM')
        and c.id = i.customer_id
group by months.mm, c.id 

我们可以将此结果提供给另一个查询:

with mtrx as (
    select months.mm
           , c.id as customer_id
           , nvl2(max(i.invoice_number), 'Y', 'N')  as bought_something
    from ( select add_months(trunc(sysdate, 'yyyy'), level-1) as mm
                     from dual
                     connect by level <= 4 ) months
         cross join customer c
         left outer join invoice i
         on months.mm = trunc(i.invoice_date, 'MM')
            and c.id = i.customer_id
    group by months.mm, c.id 
    ) 
select customer_id from mtrx where mm = date '2015-01-01' and bought_something = 'Y'
intersect
select customer_id from mtrx where mm = date '2015-02-01' and bought_something = 'Y'
intersect
select customer_id from mtrx where mm = date '2015-03-01' and bought_something = 'Y'
intersect
select customer_id from mtrx where mm = date '2015-04-01' and bought_something = 'N'
;

这可能不是我教授所期望的 "analytical" 解决方案,但它确实产生了正确的结果。找到 obligatory SQL Fiddle here

调整最终结果以获得客户名称留作 reader 的练习。同样将结果集精炼为每个月的相同项目:)

这将 return 在三个月内购买了 相同 商品但在第 4 个月没有购买的顾客:

SELECT *
FROM 
 ( SELECT customer_id, product_id
   FROM invoice
   WHERE invoice_date BETWEEN DATE '2015-01-01' AND DATE '2015-04-30' -- data for 4 months
   GROUP BY customer_id, product_id
   HAVING COUNT(DISTINCT EXTRACT(MONTH FROM invoice_date)) = 3        -- at least one per month
     AND MAX(invoice_date) < DATE '2015-04-01'                        -- none in april 
 ) i 
JOIN customer c 
  ON i.customer_id = c.id
JOIN product p
  ON i.product_id = p.id;

根据您的评论,这不是正确答案。客户可能在前三个月购买了任意组合的商品,但在第 4 个月没有购买 none 这些商品(但其他商品)。这应该 return 正确答案:

WITH cte AS 
 ( SELECT customer_id, product_id, 
      -- number of months with buys per customer
      COUNT(DISTINCT EXTRACT(MONTH FROM invoice_date))
      OVER (PARTITION BY customer_id) AS cnt
    FROM invoice
    WHERE invoice_date BETWEEN DATE '2015-01-01' AND DATE '2015-03-31'
 )
SELECT DISTINCT customer_id 
FROM cte
WHERE cnt = 3  -- at least one buy per month
AND NOT EXISTS -- product wasn't bought by customer in april
 ( SELECT * FROM invoice i
   WHERE i.invoice_date BETWEEN DATE '2015-04-01' AND DATE '2015-04-30'
   AND i.customer_id = cte.customer_id
   AND i.product_id = cte.product_id
 )

您可以使用 TRUNC(invoice_date, 'mon' 而不是 EXTRACT(MONTH FROM invoice_date),但我更喜欢标准 SQL 语法。

这将 return 您的第二个结果:

WITH cte AS
 ( -- data from one month
   SELECT *
   FROM invoice 
   WHERE invoice_date BETWEEN DATE '2015-02-01' AND DATE '2015-02-28'
 )
SELECT DISTINCT t1.customer_id, t2.customer_id, t1.product_id -- need DISTINCT because there might be multiple rows per product/customer
FROM cte t1 JOIN cte t2
  ON t1.product_id = t2.product_id    -- same product
  AND t1.customer_id < t2.customer_id -- different customers
WHERE t1.quantity BETWEEN t2.quantity / 1.05 AND t2.quantity * 1.05

您需要将此结果添加回 customerproduct 以获得更多详细信息。