如何使用 dplyr 将此 SQL 代码转换为 R 脚本?
How can I translate this SQL code to R script using dplyr?
我目前正在做一个项目,我想对加入的 table 中的一个专栏进行两次总结。 SQL代码是这样的:
SELECT M.date,T.team_long_name AS Home_Team, M.home_team_goal, Te.team_long_name AS Away_Team, M.away_team_goal
FROM Match AS M JOIN Team AS T
ON T.team_api_id = M.home_team_api_id
JOIN Team AS Te
ON Te.team_api_id = M.away_team_api_id
WHERE match_api_id = 539848;
...结果是这样的:
数据库table如下所示:
希望我已经提供了所有需要的信息。
问题:如何仅使用 dplyr 库在 R 中得到相同的结果?
Table 前 10 行的名称和结构如下:
匹配:
structure(list(id = 1:10, country_id = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), league_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), season = c("2008/2009", "2008/2009", "2008/2009",
"2008/2009", "2008/2009", "2008/2009", "2008/2009", "2008/2009",
"2008/2009", "2008/2009"), stage = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 10L), date = c("2008-08-17 00:00:00", "2008-08-16 00:00:00",
"2008-08-16 00:00:00", "2008-08-17 00:00:00", "2008-08-16 00:00:00",
"2008-09-24 00:00:00", "2008-08-16 00:00:00", "2008-08-16 00:00:00",
"2008-08-16 00:00:00", "2008-11-01 00:00:00"), match_api_id = c(492473L,
492474L, 492475L, 492476L, 492477L, 492478L, 492479L, 492480L,
492481L, 492564L), home_team_api_id = c(9987L, 10000L, 9984L,
9991L, 7947L, 8203L, 9999L, 4049L, 10001L, 8342L), away_team_api_id = c(9993L,
9994L, 8635L, 9998L, 9985L, 8342L, 8571L, 9996L, 9986L, 8571L
), home_team_goal = c(1L, 0L, 0L, 5L, 1L, 1L, 2L, 1L, 1L, 4L),
away_team_goal = c(1L, 0L, 3L, 0L, 3L, 1L, 2L, 2L, 0L, 1L
)), row.names = c(NA, 10L), class = "data.frame")
团队:
structure(list(id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 614L, 1034L), team_api_id = c(9987L,
9993L, 10000L, 9994L, 9984L, 8635L, 9991L, 9998L, 7947L, 9985L,
8203L, 8342L, 9999L, 8571L, 4049L, 9996L, 10001L, 9986L, 9997L,
9989L), team_long_name = c("KRC Genk", "Beerschot AC", "SV Zulte-Waregem",
"Sporting Lokeren", "KSV Cercle Brugge", "RSC Anderlecht", "KAA Gent",
"RAEC Mons", "FCV Dender EH", "Standard de Liège", "KV Mechelen",
"Club Brugge KV", "KSV Roeselare", "KV Kortrijk", "Tubize", "Royal Excel Mouscron",
"KVC Westerlo", "Sporting Charleroi", "Sint-Truidense VV", "Lierse SK"
)), row.names = c(NA, 20L), class = "data.frame")
在期望的结果中,我使用了 match_api_id = 539848,但由于它未包含在此示例数据中,请使用您自己的选择之一。
The main issue is to be able to have team_long_name twice in the result but for different teams, matching by their team_api_id 's.
前面,dbplyr
管道:
tbl_match <- tbl(fakedb, "Match")
tbl_team <- tbl(fakedb, "Team")
tbl_match %>%
filter(match_api_id == 492477) %>%
inner_join(select(tbl_team, home_team_api_id = team_api_id, Home_Team = team_long_name),
by = "home_team_api_id") %>%
inner_join(select(tbl_team, away_team_api_id = team_api_id, Away_Team = team_long_name),
by = "away_team_api_id") %>%
select(date, Home_Team, Away_Team) %>%
collect()
编辑 以包含 collect()
,因为没有它输出不是正确的帧 and/or 可能不会包含所有相关数据。
来自相应的 DBI 调用:
DBI::dbGetQuery(fakedb, some_long_query)
从您的示例数据回填。请注意,您的数据不一致且不完整,所以我不得不做一些assumptions/translations。例如,我推断您的第一个 structure
是 Match
,与您图片中描述的架构不匹配:它包括额外的列,例如 season
和 *_team_goal
.另外,您查询的 match_api_id
of 539848
不在示例数据中,所以我使用了一个存在的数据。 (以后类似这样的事情我建议你的代码和样例数据保持一致。)
为本答案生成伪造数据库的代码。从你的两个 structure
开始,如 Match
和 Team
.
library(dbplyr)
library(dplyr)
fakedb <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
copy_to(fakedb, Match)
copy_to(fakedb, Team)
some_long_query <- '
SELECT
M.date, T.team_long_name AS Home_Team, M.home_team_goal,
Te.team_long_name AS Away_Team, M.away_team_goal
FROM
Match AS M
JOIN Team AS T ON T.team_api_id = M.home_team_api_id
JOIN Team AS Te ON Te.team_api_id = M.away_team_api_id
WHERE
match_api_id = 492477;' # 539848
DBI::dbGetQuery(fakedb, some_long_query)
# date Home_Team home_team_goal Away_Team away_team_goal
# 1 2008-08-16 00:00:00 FCV Dender EH 1 Standard de Liège 3
tbl_match <- tbl(fakedb, "Match")
tbl_team <- tbl(fakedb, "Team")
tbl_match %>%
filter(match_api_id == 492477) %>%
inner_join(select(tbl_team, home_team_api_id = team_api_id, Home_Team = team_long_name),
by = "home_team_api_id") %>%
inner_join(select(tbl_team, away_team_api_id = team_api_id, Away_Team = team_long_name),
by = "away_team_api_id") %>%
select(date, Home_Team, home_team_goal, Away_Team, away_team_goal) %>%
collect()
# A tibble: 1 x 5
# date Home_Team home_team_goal Away_Team away_team_goal
# <chr> <chr> <int> <chr> <int>
# 1 2008-08-16 00:00:00 FCV Dender EH 1 Standard de Liège 3
我目前正在做一个项目,我想对加入的 table 中的一个专栏进行两次总结。 SQL代码是这样的:
SELECT M.date,T.team_long_name AS Home_Team, M.home_team_goal, Te.team_long_name AS Away_Team, M.away_team_goal
FROM Match AS M JOIN Team AS T
ON T.team_api_id = M.home_team_api_id
JOIN Team AS Te
ON Te.team_api_id = M.away_team_api_id
WHERE match_api_id = 539848;
...结果是这样的:
数据库table如下所示:
希望我已经提供了所有需要的信息。
问题:如何仅使用 dplyr 库在 R 中得到相同的结果?
Table 前 10 行的名称和结构如下:
匹配:
structure(list(id = 1:10, country_id = c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), league_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), season = c("2008/2009", "2008/2009", "2008/2009",
"2008/2009", "2008/2009", "2008/2009", "2008/2009", "2008/2009",
"2008/2009", "2008/2009"), stage = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 10L), date = c("2008-08-17 00:00:00", "2008-08-16 00:00:00",
"2008-08-16 00:00:00", "2008-08-17 00:00:00", "2008-08-16 00:00:00",
"2008-09-24 00:00:00", "2008-08-16 00:00:00", "2008-08-16 00:00:00",
"2008-08-16 00:00:00", "2008-11-01 00:00:00"), match_api_id = c(492473L,
492474L, 492475L, 492476L, 492477L, 492478L, 492479L, 492480L,
492481L, 492564L), home_team_api_id = c(9987L, 10000L, 9984L,
9991L, 7947L, 8203L, 9999L, 4049L, 10001L, 8342L), away_team_api_id = c(9993L,
9994L, 8635L, 9998L, 9985L, 8342L, 8571L, 9996L, 9986L, 8571L
), home_team_goal = c(1L, 0L, 0L, 5L, 1L, 1L, 2L, 1L, 1L, 4L),
away_team_goal = c(1L, 0L, 3L, 0L, 3L, 1L, 2L, 2L, 0L, 1L
)), row.names = c(NA, 10L), class = "data.frame")
团队:
structure(list(id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 614L, 1034L), team_api_id = c(9987L,
9993L, 10000L, 9994L, 9984L, 8635L, 9991L, 9998L, 7947L, 9985L,
8203L, 8342L, 9999L, 8571L, 4049L, 9996L, 10001L, 9986L, 9997L,
9989L), team_long_name = c("KRC Genk", "Beerschot AC", "SV Zulte-Waregem",
"Sporting Lokeren", "KSV Cercle Brugge", "RSC Anderlecht", "KAA Gent",
"RAEC Mons", "FCV Dender EH", "Standard de Liège", "KV Mechelen",
"Club Brugge KV", "KSV Roeselare", "KV Kortrijk", "Tubize", "Royal Excel Mouscron",
"KVC Westerlo", "Sporting Charleroi", "Sint-Truidense VV", "Lierse SK"
)), row.names = c(NA, 20L), class = "data.frame")
在期望的结果中,我使用了 match_api_id = 539848,但由于它未包含在此示例数据中,请使用您自己的选择之一。
The main issue is to be able to have team_long_name twice in the result but for different teams, matching by their team_api_id 's.
前面,dbplyr
管道:
tbl_match <- tbl(fakedb, "Match")
tbl_team <- tbl(fakedb, "Team")
tbl_match %>%
filter(match_api_id == 492477) %>%
inner_join(select(tbl_team, home_team_api_id = team_api_id, Home_Team = team_long_name),
by = "home_team_api_id") %>%
inner_join(select(tbl_team, away_team_api_id = team_api_id, Away_Team = team_long_name),
by = "away_team_api_id") %>%
select(date, Home_Team, Away_Team) %>%
collect()
编辑 以包含 collect()
,因为没有它输出不是正确的帧 and/or 可能不会包含所有相关数据。
来自相应的 DBI 调用:
DBI::dbGetQuery(fakedb, some_long_query)
从您的示例数据回填。请注意,您的数据不一致且不完整,所以我不得不做一些assumptions/translations。例如,我推断您的第一个 structure
是 Match
,与您图片中描述的架构不匹配:它包括额外的列,例如 season
和 *_team_goal
.另外,您查询的 match_api_id
of 539848
不在示例数据中,所以我使用了一个存在的数据。 (以后类似这样的事情我建议你的代码和样例数据保持一致。)
为本答案生成伪造数据库的代码。从你的两个 structure
开始,如 Match
和 Team
.
library(dbplyr)
library(dplyr)
fakedb <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
copy_to(fakedb, Match)
copy_to(fakedb, Team)
some_long_query <- '
SELECT
M.date, T.team_long_name AS Home_Team, M.home_team_goal,
Te.team_long_name AS Away_Team, M.away_team_goal
FROM
Match AS M
JOIN Team AS T ON T.team_api_id = M.home_team_api_id
JOIN Team AS Te ON Te.team_api_id = M.away_team_api_id
WHERE
match_api_id = 492477;' # 539848
DBI::dbGetQuery(fakedb, some_long_query)
# date Home_Team home_team_goal Away_Team away_team_goal
# 1 2008-08-16 00:00:00 FCV Dender EH 1 Standard de Liège 3
tbl_match <- tbl(fakedb, "Match")
tbl_team <- tbl(fakedb, "Team")
tbl_match %>%
filter(match_api_id == 492477) %>%
inner_join(select(tbl_team, home_team_api_id = team_api_id, Home_Team = team_long_name),
by = "home_team_api_id") %>%
inner_join(select(tbl_team, away_team_api_id = team_api_id, Away_Team = team_long_name),
by = "away_team_api_id") %>%
select(date, Home_Team, home_team_goal, Away_Team, away_team_goal) %>%
collect()
# A tibble: 1 x 5
# date Home_Team home_team_goal Away_Team away_team_goal
# <chr> <chr> <int> <chr> <int>
# 1 2008-08-16 00:00:00 FCV Dender EH 1 Standard de Liège 3