如何创建我可以从 DBeaver 调用的 PostgreSQL 函数？

Question

这是样本日期：

CREATE TABLE #logins (
    username text not null,
    logged_at timestamp not null);  
insert into #logins (username, logged_at) values
    ('a','2019-01-01'),('b','2019-01-01'),('c','2019-01-01'),('d','2019-01-01'),('e','2019-01-01'),        
    ('a','2019-02-01'),('b','2019-02-01'),('c','2019-02-01'),('f','2019-02-01'),('g','2019-02-01'),
    ('h','2019-02-01'),('i','2019-02-01'),('j','2019-02-01'),('a','2019-03-01'),('b','2019-03-01'),
    ('f','2019-03-01'),('h','2019-03-01'),('g','2019-03-01'),('k','2019-03-01'),('l','2019-03-01'),
    ('m','2019-03-01'),('n','2019-03-01'),('o','2019-03-01'),('a','2019-04-01'),('f','2019-04-01'),   
    ('g','2019-04-01'),('k','2019-04-01'),('l','2019-04-01')`

我平时做什么

drop table if exists #a;
create table #a as 
select username, min(logged_at) as date from #logins --Please note that there is **MIN()** here 
group by 1;


alter table #a 
add m_1 varchar;
update #a
set m_1 = (select username from #logins 
            where add_months(#a.date,1) = #logins.logged_at and #logins.username = #a.username);

alter table #a 
add m_2 varchar;
update #a
set m_2 = (select username from #logins 
            where add_months(#a.date,2) = #logins.logged_at and #logins.username = #a.username);

alter table #a 
add m_3 varchar;
update #a
set m_3 = (select username from #logins 
            where add_months(#a.date,1) = #logins.logged_at and #logins.username = #a.username);

select to_date(date,'yyyy-mm') as date, count(username) as num_acc,
                                    count(m_1) as m_1,
                                    count(m_2) as m_2,
                                    count(m_3) as m_3
from #a
group by 1
order by 1

预期结果：

             num_acc   m_1     m_2     m_3
2019-01-01      5       3       2       3
2019-02-01      5       3       2       3
2019-03-01      5       2       0       2

从这一点开始，我将下载数据并在队列中可视化它

重点是我想创建一个功能方便。我正在使用 PostgreSQL 开发 Dbeaver，供您参考。在这个函数中，我们只需要输入一个带有ID和日期的table，它就会自动完成这个过程。

这是我目前的尝试：

 CREATE OR REPLACE FUNCTION test(timestamp,varchar(255)) 
    RETURNS int 
declare
    counter integer :=1
stable 
AS $$

   LOOP 
      EXIT WHEN counter = 6 ; 
      counter := counter + 1 ; 
      alter table #a
      add counter varchar;
     update #a
     counter = select user_name from #logins 
                where add_month(#logins.logged_at,counter) = #a.first_login 
                #a.first_login and #logins.username = #a.username
   END LOOP 

$$ LANGUAGE sql;

这很尴尬，因为 SQL 中的 function 非常困难。这是我能做的最好的了。

(p/s：LANGUAGE plpythonu不能用请见谅，我们只有sql)

Answer 1

修订：纳入附加要求

嗯，有了新的信息，可以做一些小的调整。自 "No matter how many times you login within a month, we only count 1, based on the username"。我们不会寻找相同的日期，而是使用 Posrgres date_trunc 函数来查看每月的 1 号，无论实际登录日期是什么。同样继续使用 WHERE EXISTS 确保无论用户登录多少次我们都只计算 1。因此 REVISED 函数：

create or replace function collect_user_login_counts(login_start_in date) 
 returns table( "Date"   text
              , num_acc  bigint
              , m_1      bigint 
              , m_2      bigint 
              , m_3      bigint
              )               
 language sql strict
as $$
    -- work table exists for single execution so clear any existing data
    truncate user_login_wrk;

    with su_dater as
      -- get each user and the earliest date of login such that the login date in not less than parameter date 
          (select l0.username, min(date_trunc('month', l0.logged_at))::date logged_at
             from logins l0
            where date_trunc('month', l0.logged_at)::date >= date_trunc('month', login_start_in)::date
            group by l0.username
          )
       , inserter as 
         -- insert the the counter table the user name for least login date and the following 3 months, 
         -- return each row for subsequent summerization  
         ( insert into user_login_wrk(username, logged_at, m_1,m_2,m_3)
             select su.username
                  , su.logged_at
                  , (select su.username where exists 
                                        (select null 
                                           from logins l1 
                                          where l1.username = su.username 
                                            and date_trunc('month',l1.logged_at)::date = (su.logged_at + interval '1 month')::date))
                  , (select su.username where exists 
                                        (select null 
                                           from logins l2 
                                          where l2.username = su.username 
                                            and date_trunc('month',l2.logged_at)::date = (su.logged_at + interval '2 month')::date))
                  , (select su.username where exists 
                                        (select null 
                                           from logins l3
                                          where l3.username = su.username 
                                            and date_trunc('month',l3.logged_at)::date = (su.logged_at + interval '3 month')::date))                                              

              from su_dater su  
          returning * 
         )
    -- summarize count on user logins over period current and next 3 months result returned caller         
    select to_char(ulc.logged_at,'yyyy-mm')  
         , count(ulc.username)  
         , count(ulc.m_1)  
         , count(ulc.m_2)  
         , count(ulc.m_3) 
      from  inserter ulc
      where ulc.logged_at >= date_trunc('month',login_start_in)::date
      group by to_char(logged_at,'yyyy-mm')
      order by to_char(logged_at,'yyyy-mm'); 
$$;

测试：为了进行测试，我更改了您的原始日期，以便实际上没有行具有该月的 1 日和 none 在同一天的数字。此外，函数的参数日期不会出现在数据中。

truncate logins;
insert into logins (username, logged_at) values
        ('a','2019-01-03'),('b','2019-01-04'),('c','2019-01-11'),('d','2019-01-15'),('e','2019-01-21'),        
        ('a','2019-02-06'),('b','2019-02-02'),('c','2019-02-04'),('f','2019-02-08'),('g','2019-02-09'),
        ('h','2019-02-12'),('i','2019-02-24'),('j','2019-02-26'),('a','2019-03-02'),('b','2019-03-03'),
        ('f','2019-03-05'),('h','2019-03-11'),('g','2019-03-17'),('k','2019-03-31'),('l','2019-03-09'),
        ('m','2019-03-29'),('n','2019-03-27'),('o','2019-03-24'),('a','2019-04-06'),('f','2019-04-03'),   
        ('g','2019-04-14'),('k','2019-04-30'),('l','2019-04-11'); 
select collect_user_login_counts(date '2019-01-18');        -- select as row
select * from collect_user_login_counts(date '2019-01-18'); -- select as individual columns

结果

Date    | num_acc| m_1| m_2| m_3
________________________________
2019-01 | 5      | 3  | 2  | 1
2019-02 | 5      | 3  | 2  | 0
2019-03 | 5      | 2  | 0  | 0

尽管数据发生变化，但产生的结果相同。顺便提一句。我确实用你的数据测试了原件。这些结果完全符合您的期望，除了 m_3，这在原始回复中有解释。我只是没有post而已，我的错误。

*原回复**
好吧，您的 posted 代码存在一些问题。正如@a_horse_with_no_name 指出# 字符无效在 Postgres 对象名称中，除非该名称被双引号括起来（即“#logins”），而不管模式如何。此外，Postges 没有 add_months 函数（您可以将其作为用户编写的函数，但我不能知道。）

我注意到一些与您的预期结果不一致的地方。首先，您 "normally do" 中的最终查询无法产生这些结果。查询returns年月为日期，预期有年月日。我假设年月。其次，我认为 m_3 预期输出是不正确的。这是由于您使用 add_months(#a.date,1) 的集合 m_3。我相信从命名结构和先前的设置来看，这是一个 copy/past 错字，应该阅读 add_months(#a.date,3)。我假设是后者。然而，这确实改变了列 m_3 的结果。

您的 posted 函数中有一个项目我还没有完全理解。我不确定神奇的数字 6 在做什么。如果您尝试创建列 m_1 到 m_6，那似乎是。但是代码实际上会尝试创建列名计数器 6 次，那将在第二次失败。在下面的函数中，我将使用 m_1 到 m_3。如果 m_6 是您的目标，只需根据需要复制 m_1 编辑。（还必须更新 table 定义）。

所做的一些更改：

我没有命名列日期。它是一个保留字，虽然你现在可以不用它，但它随时都可能改变。所以我会在作品table.
我不喜欢 DB 对象的单字符名称所以 #a 变成 user_login_wrk.
我避免在函数中使用 DML（创建、更改）。所以 table 是在外部创建的。除了 SQL 函数，它必须存在最初除非整个函数是动态的 SQL 和单个字符串。

考虑到所有这些，我们得到：

-- create 'months' work table
create table user_login_wrk( username  text
                           , logged_at date
                           , m_1       text
                           , m_2       text
                           , m_3       text
                           );

现在是重头戏。

create or replace function collect_user_login_counts(login_start_in date) 
 returns table( "Date"   text
              , num_acc  bigint
              , m_1      bigint 
              , m_2      bigint 
              , m_3      bigint
              )               
 language sql strict
as $$
    -- work table exists for single execution so clear any existing data
    truncate user_login_wrk;

    with su_dater as
          -- get each user and the earliest date of login such that the login date in not less than parameter date 
          (select l0.username, min(l0.logged_at)::date logged_at
             from logins l0
            where l0.logged_at::date >= login_start_in
            group by l0.username
          )
       , inserter as 
         -- insert the the counter table the user name for least login date and the following 3 months, 
         -- return each row for subsequent summerization  
         ( insert into user_login_wrk(username, logged_at, m_1,m_2,m_3)
             select su.username
                  , su.logged_at
                  , (select su.username where exists (select null from logins l1 where l1.username = su.username and l1.logged_at = su.logged_at + interval '1 month')) 
                  , (select su.username where exists (select null from logins l2 where l2.username = su.username and l2.logged_at = su.logged_at + interval '2 month'))
                  , (select su.username where exists (select null from logins l3 where l3.username = su.username and l3.logged_at = su.logged_at + interval '3 month'))    
               from su_dater su  
          returning * 
         )
    -- summerize count on user logins over period current and next 3 months result returned caller         
    select to_char(ulc.logged_at,'yyyy-mm')  
         , count(ulc.username)  
         , count(ulc.m_1)  
         , count(ulc.m_2)  
         , count(ulc.m_3) 
      from  inserter ulc
      where ulc.logged_at >= login_start_in
      group by to_char(logged_at,'yyyy-mm')
      order by to_char(logged_at,'yyyy-mm'); 
$$;
-- test
select collect_user_login_counts(date '2019-01-01');        -- select as row
select * from collect_user_login_counts(date '2019-01-01'); -- select as individual columns
select * from collect_user_login_counts(date '2019-02-01'); -- Next month

以上完全刷新了工作table并重建了它。但是，有时查看最后运行的结果是 desirable/needed。以下提供了该功能。（请注意，如果需要，可以提取实际查询并运行独立。

create or replace function show_user_login_counts()
 returns table( "Date"   text
              , num_acc  bigint
              , m_1      bigint 
              , m_2      bigint 
              , m_3      bigint
              )               
 language sql strict
as $$    
    select to_char(ulc.logged_at,'yyyy-mm')  
         , count(ulc.username)  
         , count(ulc.m_1)  
         , count(ulc.m_2)  
         , count(ulc.m_3) 
     from user_login_wrk ulc
    group by to_char(logged_at,'yyyy-mm')
    order by to_char(logged_at,'yyyy-mm')  ; 
$$;
-- test
select show_user_login_counts();        -- select as row
select * from show_user_login_counts(); -- select as individual columns

有几个问题没有解决。当前每个后续 (m_1、m_2、m_3) 都是从开始日期算起的确切月份？如果用户登录不是确切日期而是第二天，会发生什么情况？也不允许用户登录一个月内多次。那么这些是另一天的问题。

如何创建我可以从 DBeaver 调用的 PostgreSQL 函数？

How to create a PostgreSQL function that I can call from DBeaver?

postgresql

stored-functions