SQL 加入的特定列每天的服务器最大时间戳 table
SQL Server max timestamp per day for specific column from joined table
我真的是 SQL 的新手,基本上是用谷歌搜索到这一点,我现在很困惑..所以我希望有人能帮忙!
目标:获取 MS SQL 服务器中每个 trip_id 每天具有最新时间戳的行(需要连接多个表才能获取所需数据)。
所以每个 trip_id 每天应该有 1 行,就像这样……[为了便于阅读删除了一些列]
timestamp,trip_id,stop_id,stop_code,arrival_time,departure_delay
4/28/2017 18:29,8888922,2847,52818,11:02:34,0
4/27/2017 18:26,8888922,2847,52818,11:02:34,60
4/25/2017 18:27,8888922,2847,52818,11:02:34,-120
4/28/2017 18:56,8888922,2847,52818,11:32:34,-60
4/25/2017 18:59,8888922,2847,52818,11:32:34,120
4/28/2017 19:34,8888922,2847,52818,12:02:34,360
4/27/2017 19:31,8888922,2847,52818,12:02:34,540
4/25/2017 19:27,8888922,2847,52818,12:02:34,-120
但是现在我能做的最好的事情就是获取每天的最大时间戳,然后 departure_delay 使用以下查询
select
max(trip_updates.timestamp) as max, stop_times.trip_id, stops.stop_id, stops.stop_code, stop_times.arrival_time, trips.service_id,
stops.stop_name, stop_times.shape_dist_traveled, stop_time_updates.departure_delay
from stops
inner join stop_times on stops.stop_id = stop_times.stop_id
inner join trips on trips.trip_id = stop_times.trip_id
inner join routes on trips.route_id = routes.route_id
inner join trip_updates on stop_times.trip_id = trip_updates.trip_id
inner join stop_time_updates on trip_updates.oid = stop_time_updates.trip_update_id
where
stop_code = '52818'
and service_id = '1'
and stop_times.arrival_time between '11:00%' and '14:00%'
and route_short_name = '134'
group by stop_times.trip_id, stops.stop_id, stops.stop_code, stop_times.arrival_time, trips.service_id,
stops.stop_name, stop_times.shape_dist_traveled, stop_time_updates.departure_delay
order by stop_times.arrival_time asc, max(trip_updates.timestamp) desc
这给了我这样的结果...
timestamp,trip_id,stop_id,stop_code,arrival_time,departure_delay
4/28/2017 18:29,8888922,2847,52818,11:02:34,0
4/28/2017 18:21,8888922,2847,52818,11:02:34,30
4/28/2017 18:16,8888922,2847,52818,11:02:34,60
4/28/2017 18:11,8888922,2847,52818,11:02:34,120
4/27/2017 18:26,8888922,2847,52818,11:02:34,60
4/27/2017 18:22,8888922,2847,52818,11:02:34,30
4/27/2017 18:20,8888922,2847,52818,11:02:34,0
感谢所有帮助!谢谢!
如我的评论所述,如果您需要 select 所有当前列,实现此目的的一种方法是使用 ROW_NUMBER()
window 函数并删除 GROUP BY
.例如,
SELECT [Max] = [timestamp], trip_id, stop_id, stop_code, arrival_time, service_id, stop_name, shape_dist_traveled, departure_delay
FROM
(
SELECT trip_updates.[timestamp], stop_times.trip_id, stops.stop_id, stops.stop_code, stop_times.arrival_time,
trips.service_id, stops.stop_name, stop_times.shape_dist_traveled, stop_time_updates.departure_delay,
RN = ROW_NUMBER() OVER (PARTITION BY stop_times.trip_id, CAST(trip_updates.[timestamp] AS DATE) ORDER BY trip_updates.[timestamp] DESC)
-- This assigns a row number for each row within each trip_id and each day, where a row number of 1 will be the highest timestamp.
FROM ...
<put all your current joins / where clauses here>
AND route_short_name = '134'
-- Note: no GROUP BY
) AS T
WHERE RN = 1 -- This ensures you select only the the first row for each trip_id.
ORDER BY arrival_time, [timestamp];
你可以加入一个有你想要的子查询:
SELECT last_trip.timestamp AS max ,
stop_times.trip_id ,
stops.stop_id ,
stops.stop_code ,
stop_times.arrival_time ,
trips.service_id ,
stops.stop_name ,
stop_times.shape_dist_traveled ,
stop_time_updates.departure_delay
FROM stops
INNER JOIN stop_times ON stops.stop_id = stop_times.stop_id
INNER JOIN trips ON trips.trip_id = stop_times.trip_id
INNER JOIN routes ON trips.route_id = routes.route_id
-- instead of joining directly to trip_updates, join to query
-- that will number the entries per day
INNER JOIN ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY trip_id ORDER BY timestamp DESC ) AS rownum
FROM trip_updates
) AS last_trip ON stop_times.trip_id = last_trip.trip_id and rownum = 1 -- and join on the latest
INNER JOIN stop_time_updates ON trip_updates.oid = stop_time_updates.trip_update_id
WHERE stop_code = '52818'
AND service_id = '1'
AND stop_times.arrival_time BETWEEN '11:00%'
AND '14:00%'
AND route_short_name = '134'
GROUP BY stop_times.trip_id ,
stops.stop_id ,
stops.stop_code ,
stop_times.arrival_time ,
trips.service_id ,
stops.stop_name ,
stop_times.shape_dist_traveled ,
stop_time_updates.departure_delay
ORDER BY stop_times.arrival_time ASC ,
MAX(trip_updates.timestamp) DESC;
我真的是 SQL 的新手,基本上是用谷歌搜索到这一点,我现在很困惑..所以我希望有人能帮忙!
目标:获取 MS SQL 服务器中每个 trip_id 每天具有最新时间戳的行(需要连接多个表才能获取所需数据)。
所以每个 trip_id 每天应该有 1 行,就像这样……[为了便于阅读删除了一些列]
timestamp,trip_id,stop_id,stop_code,arrival_time,departure_delay
4/28/2017 18:29,8888922,2847,52818,11:02:34,0
4/27/2017 18:26,8888922,2847,52818,11:02:34,60
4/25/2017 18:27,8888922,2847,52818,11:02:34,-120
4/28/2017 18:56,8888922,2847,52818,11:32:34,-60
4/25/2017 18:59,8888922,2847,52818,11:32:34,120
4/28/2017 19:34,8888922,2847,52818,12:02:34,360
4/27/2017 19:31,8888922,2847,52818,12:02:34,540
4/25/2017 19:27,8888922,2847,52818,12:02:34,-120
但是现在我能做的最好的事情就是获取每天的最大时间戳,然后 departure_delay 使用以下查询
select
max(trip_updates.timestamp) as max, stop_times.trip_id, stops.stop_id, stops.stop_code, stop_times.arrival_time, trips.service_id,
stops.stop_name, stop_times.shape_dist_traveled, stop_time_updates.departure_delay
from stops
inner join stop_times on stops.stop_id = stop_times.stop_id
inner join trips on trips.trip_id = stop_times.trip_id
inner join routes on trips.route_id = routes.route_id
inner join trip_updates on stop_times.trip_id = trip_updates.trip_id
inner join stop_time_updates on trip_updates.oid = stop_time_updates.trip_update_id
where
stop_code = '52818'
and service_id = '1'
and stop_times.arrival_time between '11:00%' and '14:00%'
and route_short_name = '134'
group by stop_times.trip_id, stops.stop_id, stops.stop_code, stop_times.arrival_time, trips.service_id,
stops.stop_name, stop_times.shape_dist_traveled, stop_time_updates.departure_delay
order by stop_times.arrival_time asc, max(trip_updates.timestamp) desc
这给了我这样的结果...
timestamp,trip_id,stop_id,stop_code,arrival_time,departure_delay
4/28/2017 18:29,8888922,2847,52818,11:02:34,0
4/28/2017 18:21,8888922,2847,52818,11:02:34,30
4/28/2017 18:16,8888922,2847,52818,11:02:34,60
4/28/2017 18:11,8888922,2847,52818,11:02:34,120
4/27/2017 18:26,8888922,2847,52818,11:02:34,60
4/27/2017 18:22,8888922,2847,52818,11:02:34,30
4/27/2017 18:20,8888922,2847,52818,11:02:34,0
感谢所有帮助!谢谢!
如我的评论所述,如果您需要 select 所有当前列,实现此目的的一种方法是使用 ROW_NUMBER()
window 函数并删除 GROUP BY
.例如,
SELECT [Max] = [timestamp], trip_id, stop_id, stop_code, arrival_time, service_id, stop_name, shape_dist_traveled, departure_delay
FROM
(
SELECT trip_updates.[timestamp], stop_times.trip_id, stops.stop_id, stops.stop_code, stop_times.arrival_time,
trips.service_id, stops.stop_name, stop_times.shape_dist_traveled, stop_time_updates.departure_delay,
RN = ROW_NUMBER() OVER (PARTITION BY stop_times.trip_id, CAST(trip_updates.[timestamp] AS DATE) ORDER BY trip_updates.[timestamp] DESC)
-- This assigns a row number for each row within each trip_id and each day, where a row number of 1 will be the highest timestamp.
FROM ...
<put all your current joins / where clauses here>
AND route_short_name = '134'
-- Note: no GROUP BY
) AS T
WHERE RN = 1 -- This ensures you select only the the first row for each trip_id.
ORDER BY arrival_time, [timestamp];
你可以加入一个有你想要的子查询:
SELECT last_trip.timestamp AS max ,
stop_times.trip_id ,
stops.stop_id ,
stops.stop_code ,
stop_times.arrival_time ,
trips.service_id ,
stops.stop_name ,
stop_times.shape_dist_traveled ,
stop_time_updates.departure_delay
FROM stops
INNER JOIN stop_times ON stops.stop_id = stop_times.stop_id
INNER JOIN trips ON trips.trip_id = stop_times.trip_id
INNER JOIN routes ON trips.route_id = routes.route_id
-- instead of joining directly to trip_updates, join to query
-- that will number the entries per day
INNER JOIN ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY trip_id ORDER BY timestamp DESC ) AS rownum
FROM trip_updates
) AS last_trip ON stop_times.trip_id = last_trip.trip_id and rownum = 1 -- and join on the latest
INNER JOIN stop_time_updates ON trip_updates.oid = stop_time_updates.trip_update_id
WHERE stop_code = '52818'
AND service_id = '1'
AND stop_times.arrival_time BETWEEN '11:00%'
AND '14:00%'
AND route_short_name = '134'
GROUP BY stop_times.trip_id ,
stops.stop_id ,
stops.stop_code ,
stop_times.arrival_time ,
trips.service_id ,
stops.stop_name ,
stop_times.shape_dist_traveled ,
stop_time_updates.departure_delay
ORDER BY stop_times.arrival_time ASC ,
MAX(trip_updates.timestamp) DESC;