通过 postgresql 循环获取活跃行程的数量
Getting amount of active trips via postgresql loop
我正在寻找一种方法,通过 postgresql 查询获取 GTFS 提要中一天中每一分钟的活跃行程量。
对于每一次旅行,我都有非规范化的开始和结束时间(以秒为单位)table。它看起来像这样:
为我提供在给定时间范围内有效的行程的查询(例如,此处为 43000 到 43600 秒)如下所示:
SELECT
COUNT(trips.trip_id)
FROM denormalized_trips AS trips
LEFT JOIN gtfs_calendar_dates AS calendar_dates
ON calendar_dates.service_id = trips.service_id
AND calendar_dates.agency_key = trips.agency_key
AND date = '2017-07-03'
AND exception_type = 1
INNER JOIN gtfs_calendar AS calendar
ON trips.service_id = calendar.service_id
AND calendar.agency_key = trips.agency_key
AND calendar.wednesday = 1
WHERE (
trip_start_time BETWEEN 46800 AND 47100
AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
)
AND NOT EXISTS (
SELECT 0
FROM gtfs_calendar_dates AS date_exceptions
WHERE date = '2017-07-03'
AND date_exceptions.agency_key = trips.agency_key
AND date_exceptions.service_id = calendar.service_id
AND exception_type = 2
);
这将导致在 13:00pm 和 13:05pm 之间开始的 12 次行程。
现在我想一整天都这样做。我想获得在例如1 分钟或者更像是 5 分钟。
我用一个循环试了一下,但这似乎只给了我 1 个结果。
这是我现在想到的:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS INTEGER AS
$BODY$
DECLARE
count INTEGER;
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
SELECT
COUNT(trips.trip_id)
INTO count
FROM denormalized_trips AS trips
LEFT JOIN gtfs_calendar_dates AS calendar_dates
ON calendar_dates.service_id = trips.service_id
AND calendar_dates.agency_key = trips.agency_key
AND date = '2017-07-03'
AND exception_type = 1
INNER JOIN gtfs_calendar AS calendar
ON trips.service_id = calendar.service_id
AND calendar.agency_key = trips.agency_key
AND calendar.wednesday = 1
WHERE (
trip_start_time BETWEEN counter AND counter + 60
AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
)
AND NOT EXISTS (
SELECT 0
FROM gtfs_calendar_dates AS date_exceptions
WHERE date = '2017-07-03'
AND date_exceptions.agency_key = trips.agency_key
AND date_exceptions.service_id = calendar.service_id
AND exception_type = 2
);
END LOOP;
RETURN count;
END;
$BODY$ LANGUAGE plpgsql STABLE;
调用结果SELECT get_active_trips(1);
现在我想返回类似 table 的结果或一组结果,而不仅仅是 1 个条目。我该怎么做?
非常感谢任何帮助。
创建如下类型
drop type if exists get_active_trips_out;
create type get_active_trips_out as
(
Count int
);
在从如下函数返回数据时使用 table 类型
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS setof get_active_trips_out AS
$BODY$
DECLARE
count INTEGER;
r get_active_trips_out%rowtype;
BEGIN
create temp table tmp_count
(
Count int
)on commit drop;
FOR counter IN 43130..50000 BY 60 LOOP
Insert into tmp_count
SELECT
COUNT(trips.trip_id)
FROM denormalized_trips AS trips
LEFT JOIN gtfs_calendar_dates AS calendar_dates
ON calendar_dates.service_id = trips.service_id
AND calendar_dates.agency_key = trips.agency_key
AND date = '2017-07-03'
AND exception_type = 1
INNER JOIN gtfs_calendar AS calendar
ON trips.service_id = calendar.service_id
AND calendar.agency_key = trips.agency_key
AND calendar.wednesday = 1
WHERE (
trip_start_time BETWEEN counter AND counter + 60
AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
)
AND NOT EXISTS (
SELECT 0
FROM gtfs_calendar_dates AS date_exceptions
WHERE date = '2017-07-03'
AND date_exceptions.agency_key = trips.agency_key
AND date_exceptions.service_id = calendar.service_id
AND exception_type = 2
);
END LOOP;
for r in
Select * from tmp_count
loop
return next r
end loop;
END;
创建函数 returning 值集有两种语法:returns setof <type>
和 returns table(<columns definition>)
。 Documentation.
还有几种方法可以从 plpgsql
函数中 return 这些值:return next
或 return query
。 Documentation.
因此,如果您只想获得整数系列,您可以用这种方式重写您的函数:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS SETOF INTEGER AS
$BODY$
DECLARE
count INTEGER;
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
SELECT
COUNT(trips.trip_id)
INTO count
FROM denormalized_trips AS trips
<rest of query here>
;
RETURN NEXT count;
END LOOP;
RETURN;
END;
$BODY$ LANGUAGE plpgsql STABLE;
或者,使用 RETURN QUERY
:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS SETOF INTEGER AS
$BODY$
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
RETURN QUERY
SELECT
COUNT(trips.trip_id)
FROM denormalized_trips AS trips
<rest of query here>
;
END LOOP;
RETURN;
END;
$BODY$ LANGUAGE plpgsql STABLE;
如果你想 return 多一列:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS TABLE (counter_value int, active_trips_count int) AS
$BODY$
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
SELECT
COUNT(trips.trip_id)
INTO active_trips_count
FROM denormalized_trips AS trips
<rest of query here>
;
counter_value := counter;
RETURN NEXT; -- There is no parameters, current values of counter_value and active_trips_count will be returned
END LOOP;
RETURN;
END;
$BODY$ LANGUAGE plpgsql STABLE;
或者,使用 RETURN QUERY
:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS TABLE (counter_value int, active_trips_count int) AS
$BODY$
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
RETURN QUERY
SELECT
counter,
COUNT(trips.trip_id)
FROM denormalized_trips AS trips
<rest of query here>
;
END LOOP;
RETURN;
END;
$BODY$ LANGUAGE plpgsql STABLE;
终于有了 returns table
的替代声明:
CREATE OR REPLACE FUNCTION get_active_trips(
n int,
out counter_value int,
out active_trips_count int)
RETURNS SETOF RECORD AS
更新
但是(!)我觉得可以使用没有循环的单个查询来简化您的任务。
考虑以下查询(我使用了 sqlfiddle 中的简化查询):
select
counter,
count(trips.trip_id)
from
generate_series(43130, 50000, 60) as counter left join
denormalized_trips as trips on (trip_start_time between counter and counter + 60)
group by counter
order by counter;
我正在寻找一种方法,通过 postgresql 查询获取 GTFS 提要中一天中每一分钟的活跃行程量。
对于每一次旅行,我都有非规范化的开始和结束时间(以秒为单位)table。它看起来像这样:
为我提供在给定时间范围内有效的行程的查询(例如,此处为 43000 到 43600 秒)如下所示:
SELECT
COUNT(trips.trip_id)
FROM denormalized_trips AS trips
LEFT JOIN gtfs_calendar_dates AS calendar_dates
ON calendar_dates.service_id = trips.service_id
AND calendar_dates.agency_key = trips.agency_key
AND date = '2017-07-03'
AND exception_type = 1
INNER JOIN gtfs_calendar AS calendar
ON trips.service_id = calendar.service_id
AND calendar.agency_key = trips.agency_key
AND calendar.wednesday = 1
WHERE (
trip_start_time BETWEEN 46800 AND 47100
AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
)
AND NOT EXISTS (
SELECT 0
FROM gtfs_calendar_dates AS date_exceptions
WHERE date = '2017-07-03'
AND date_exceptions.agency_key = trips.agency_key
AND date_exceptions.service_id = calendar.service_id
AND exception_type = 2
);
这将导致在 13:00pm 和 13:05pm 之间开始的 12 次行程。
现在我想一整天都这样做。我想获得在例如1 分钟或者更像是 5 分钟。 我用一个循环试了一下,但这似乎只给了我 1 个结果。 这是我现在想到的:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS INTEGER AS
$BODY$
DECLARE
count INTEGER;
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
SELECT
COUNT(trips.trip_id)
INTO count
FROM denormalized_trips AS trips
LEFT JOIN gtfs_calendar_dates AS calendar_dates
ON calendar_dates.service_id = trips.service_id
AND calendar_dates.agency_key = trips.agency_key
AND date = '2017-07-03'
AND exception_type = 1
INNER JOIN gtfs_calendar AS calendar
ON trips.service_id = calendar.service_id
AND calendar.agency_key = trips.agency_key
AND calendar.wednesday = 1
WHERE (
trip_start_time BETWEEN counter AND counter + 60
AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
)
AND NOT EXISTS (
SELECT 0
FROM gtfs_calendar_dates AS date_exceptions
WHERE date = '2017-07-03'
AND date_exceptions.agency_key = trips.agency_key
AND date_exceptions.service_id = calendar.service_id
AND exception_type = 2
);
END LOOP;
RETURN count;
END;
$BODY$ LANGUAGE plpgsql STABLE;
调用结果SELECT get_active_trips(1);
现在我想返回类似 table 的结果或一组结果,而不仅仅是 1 个条目。我该怎么做?
非常感谢任何帮助。
创建如下类型
drop type if exists get_active_trips_out;
create type get_active_trips_out as
(
Count int
);
在从如下函数返回数据时使用 table 类型
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS setof get_active_trips_out AS
$BODY$
DECLARE
count INTEGER;
r get_active_trips_out%rowtype;
BEGIN
create temp table tmp_count
(
Count int
)on commit drop;
FOR counter IN 43130..50000 BY 60 LOOP
Insert into tmp_count
SELECT
COUNT(trips.trip_id)
FROM denormalized_trips AS trips
LEFT JOIN gtfs_calendar_dates AS calendar_dates
ON calendar_dates.service_id = trips.service_id
AND calendar_dates.agency_key = trips.agency_key
AND date = '2017-07-03'
AND exception_type = 1
INNER JOIN gtfs_calendar AS calendar
ON trips.service_id = calendar.service_id
AND calendar.agency_key = trips.agency_key
AND calendar.wednesday = 1
WHERE (
trip_start_time BETWEEN counter AND counter + 60
AND '2017-07-03' BETWEEN calendar.start_date AND calendar.end_date
)
AND NOT EXISTS (
SELECT 0
FROM gtfs_calendar_dates AS date_exceptions
WHERE date = '2017-07-03'
AND date_exceptions.agency_key = trips.agency_key
AND date_exceptions.service_id = calendar.service_id
AND exception_type = 2
);
END LOOP;
for r in
Select * from tmp_count
loop
return next r
end loop;
END;
创建函数 returning 值集有两种语法:returns setof <type>
和 returns table(<columns definition>)
。 Documentation.
还有几种方法可以从 plpgsql
函数中 return 这些值:return next
或 return query
。 Documentation.
因此,如果您只想获得整数系列,您可以用这种方式重写您的函数:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS SETOF INTEGER AS
$BODY$
DECLARE
count INTEGER;
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
SELECT
COUNT(trips.trip_id)
INTO count
FROM denormalized_trips AS trips
<rest of query here>
;
RETURN NEXT count;
END LOOP;
RETURN;
END;
$BODY$ LANGUAGE plpgsql STABLE;
或者,使用 RETURN QUERY
:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS SETOF INTEGER AS
$BODY$
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
RETURN QUERY
SELECT
COUNT(trips.trip_id)
FROM denormalized_trips AS trips
<rest of query here>
;
END LOOP;
RETURN;
END;
$BODY$ LANGUAGE plpgsql STABLE;
如果你想 return 多一列:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS TABLE (counter_value int, active_trips_count int) AS
$BODY$
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
SELECT
COUNT(trips.trip_id)
INTO active_trips_count
FROM denormalized_trips AS trips
<rest of query here>
;
counter_value := counter;
RETURN NEXT; -- There is no parameters, current values of counter_value and active_trips_count will be returned
END LOOP;
RETURN;
END;
$BODY$ LANGUAGE plpgsql STABLE;
或者,使用 RETURN QUERY
:
CREATE OR REPLACE FUNCTION get_active_trips(n int)
RETURNS TABLE (counter_value int, active_trips_count int) AS
$BODY$
BEGIN
FOR counter IN 43130..50000 BY 60 LOOP
RETURN QUERY
SELECT
counter,
COUNT(trips.trip_id)
FROM denormalized_trips AS trips
<rest of query here>
;
END LOOP;
RETURN;
END;
$BODY$ LANGUAGE plpgsql STABLE;
终于有了 returns table
的替代声明:
CREATE OR REPLACE FUNCTION get_active_trips(
n int,
out counter_value int,
out active_trips_count int)
RETURNS SETOF RECORD AS
更新
但是(!)我觉得可以使用没有循环的单个查询来简化您的任务。
考虑以下查询(我使用了 sqlfiddle 中的简化查询):
select
counter,
count(trips.trip_id)
from
generate_series(43130, 50000, 60) as counter left join
denormalized_trips as trips on (trip_start_time between counter and counter + 60)
group by counter
order by counter;