使用 R 中的 httr 包通过 POST 发送 SQL 字符串
Send SQL string through POST with httr package in R
我正在尝试从此网站下载文件 https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=289。
该网站上的表单生成一个 POST link 向其服务器提交请求,以创建存储在此处的临时文件 https://transtats.bts.gov/ftproot/TranStatsData/.
关于表单数据,我可以看到如下:
UserTableName: DB1BCoupon
DBShortName:
RawDataTable: T_DB1B_COUPON
sqlstr: +SELECT+ORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID+FROM++T_DB1B_COUPON+WHERE+Quarter+%3D1+AND+YEAR%3D2017
varlist: ORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID
基于上述内容,并使用 httr 包,我一直在尝试以下操作:
library(httr)
web <- https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=289
POST(web, body = "+SELECT+ORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID+FROM++T_DB1B_COUPON+WHERE+Quarter+%3D1+AND+YEAR%3D2017", encode = "form")
现在我希望收到包含以下信息的回复 header:
Location: https://transtats.bts.gov/ftproot/TranStatsData/847324776_T_DB1B_COUPON.zip
但是,出于某种原因,我似乎无法理解。我确定 POST 的代码是错误的,但我不确定哪里或哪里做错了。
迟到总比不到好(回答)?
POST
非常复杂,网站在处理后重定向到另一个 GET
以获取 ZIP 内容。
右键单击 POST
行并选择 "Copy as cURL"。之后不要修改剪贴板,然后使用 curlconverter
将其转换为 R 函数:
library(curlconverter)
straighten() %>% make_req() -> tmp # it automagically uses the clipboard contents
在你的OS上点击"paste",你会得到一个更长的版本:
httr::POST(
url = "https://www.transtats.bts.gov/DownLoad_Table.asp",
httr::add_headers(
Referer = "https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=289"
),
body = list(
UserTableName = "DB1BCoupon",
DBShortName = "",
RawDataTable = "T_DB1B_COUPON",
sqlstr = " SELECT ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN_CITY_MARKET_ID,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST_CITY_MARKET_ID FROM T_DB1B_COUPON WHERE Quarter=1 AND YEAR=2018",
varlist = "ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN_CITY_MARKET_ID,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST_CITY_MARKET_ID",
grouplist = "", suml = "",
sumRegion = "", filter1 = "title=",
filter2 = "title=", geo = "All\xa0",
time = "Q+1", timename = "Quarter",
GEOGRAPHY = "All", XYEAR = "2018",
FREQUENCY = "1",
VarDesc = "ItinID", VarType = "Num",
VarDesc = "MktID", VarType = "Num",
VarDesc = "SeqNum", VarType = "Num",
VarDesc = "Coupons", VarType = "Num",
VarDesc = "Year", VarType = "Num", VarName = "ORIGIN_AIRPORT_ID",
VarDesc = "OriginAirportID", VarType = "Num", VarName = "ORIGIN_AIRPORT_SEQ_ID",
VarDesc = "OriginAirportSeqID", VarType = "Num", VarName = "ORIGIN_CITY_MARKET_ID",
VarDesc = "OriginCityMarketID", VarType = "Num",
VarDesc = "Quarter", VarType = "Num",
VarDesc = "Origin", VarType = "Char",
VarDesc = "OriginCountry", VarType = "Char",
VarDesc = "OriginStateFips", VarType = "Char",
VarDesc = "OriginState", VarType = "Char",
VarDesc = "OriginStateName", VarType = "Char",
VarDesc = "OriginWac", VarType = "Num", VarName = "DEST_AIRPORT_ID",
VarDesc = "DestAirportID", VarType = "Num", VarName = "DEST_AIRPORT_SEQ_ID",
VarDesc = "DestAirportSeqID", VarType = "Num", VarName = "DEST_CITY_MARKET_ID",
VarDesc = "DestCityMarketID", VarType = "Num",
VarDesc = "Dest", VarType = "Char",
VarDesc = "DestCountry", VarType = "Char",
VarDesc = "DestStateFips", VarType = "Char",
VarDesc = "DestState", VarType = "Char",
VarDesc = "DestStateName", VarType = "Char",
VarDesc = "DestWac", VarType = "Num",
VarDesc = "Break", VarType = "Char",
VarDesc = "CouponType", VarType = "Char",
VarDesc = "TkCarrier", VarType = "Char",
VarDesc = "OpCarrier", VarType = "Char",
VarDesc = "RPCarrier", VarType = "Char",
VarDesc = "Passengers", VarType = "Num",
VarDesc = "FareClass", VarType = "Char",
VarDesc = "Distance", VarType = "Num",
VarDesc = "DistanceGroup", VarType = "Num",
VarDesc = "Gateway", VarType = "Num",
VarDesc = "ItinGeoType", VarType = "Num",
VarDesc = "CouponGeoType", VarType = "Num"
),
encode = "form",
query = list(
Table_ID = "289",
Has_Group = "0",
Is_Zipped = "0"
)
) -> res
(是的,甚至比那更长)
sqlstr
参数有 SQL 查询,我不确定 POST
中有多少是 "required" 但它 "worked for me"
res
肯定很大并且有二进制压缩数据:
res
## Response [https://transtats.bts.gov/ftproot/TranStatsData/351117019_T_DB1B_COUPON.zip]
## Date: 2018-10-14 02:18
## Status: 200
## Content-Type: application/x-zip-compressed
## Size: 14.6 MB
## <BINARY BODY>
我们可以将其保存到磁盘并确保其有效:
(save_to <- file.path("~/Data", basename(grep("\.zip", unlist(res$all_headers), value=TRUE))))
## [1] "~/Data/351117019_T_DB1B_COUPON.zip"
writeBin(httr::content(res, as="raw"), save_to)
unzip(save_to, list = TRUE)
## Name Length Date
## 1 351117019_T_DB1B_COUPON.csv 378311108 2018-10-13 22:18:00
我正在尝试从此网站下载文件 https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=289。 该网站上的表单生成一个 POST link 向其服务器提交请求,以创建存储在此处的临时文件 https://transtats.bts.gov/ftproot/TranStatsData/.
关于表单数据,我可以看到如下:
UserTableName: DB1BCoupon
DBShortName:
RawDataTable: T_DB1B_COUPON
sqlstr: +SELECT+ORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID+FROM++T_DB1B_COUPON+WHERE+Quarter+%3D1+AND+YEAR%3D2017
varlist: ORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID
基于上述内容,并使用 httr 包,我一直在尝试以下操作:
library(httr)
web <- https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=289
POST(web, body = "+SELECT+ORIGIN_AIRPORT_ID%2CORIGIN_AIRPORT_SEQ_ID%2CORIGIN_CITY_MARKET_ID%2CDEST_AIRPORT_ID%2CDEST_AIRPORT_SEQ_ID%2CDEST_CITY_MARKET_ID+FROM++T_DB1B_COUPON+WHERE+Quarter+%3D1+AND+YEAR%3D2017", encode = "form")
现在我希望收到包含以下信息的回复 header:
Location: https://transtats.bts.gov/ftproot/TranStatsData/847324776_T_DB1B_COUPON.zip
但是,出于某种原因,我似乎无法理解。我确定 POST 的代码是错误的,但我不确定哪里或哪里做错了。
迟到总比不到好(回答)?
POST
非常复杂,网站在处理后重定向到另一个 GET
以获取 ZIP 内容。
右键单击 POST
行并选择 "Copy as cURL"。之后不要修改剪贴板,然后使用 curlconverter
将其转换为 R 函数:
library(curlconverter)
straighten() %>% make_req() -> tmp # it automagically uses the clipboard contents
在你的OS上点击"paste",你会得到一个更长的版本:
httr::POST(
url = "https://www.transtats.bts.gov/DownLoad_Table.asp",
httr::add_headers(
Referer = "https://www.transtats.bts.gov/DL_SelectFields.asp?Table_ID=289"
),
body = list(
UserTableName = "DB1BCoupon",
DBShortName = "",
RawDataTable = "T_DB1B_COUPON",
sqlstr = " SELECT ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN_CITY_MARKET_ID,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST_CITY_MARKET_ID FROM T_DB1B_COUPON WHERE Quarter=1 AND YEAR=2018",
varlist = "ORIGIN_AIRPORT_ID,ORIGIN_AIRPORT_SEQ_ID,ORIGIN_CITY_MARKET_ID,DEST_AIRPORT_ID,DEST_AIRPORT_SEQ_ID,DEST_CITY_MARKET_ID",
grouplist = "", suml = "",
sumRegion = "", filter1 = "title=",
filter2 = "title=", geo = "All\xa0",
time = "Q+1", timename = "Quarter",
GEOGRAPHY = "All", XYEAR = "2018",
FREQUENCY = "1",
VarDesc = "ItinID", VarType = "Num",
VarDesc = "MktID", VarType = "Num",
VarDesc = "SeqNum", VarType = "Num",
VarDesc = "Coupons", VarType = "Num",
VarDesc = "Year", VarType = "Num", VarName = "ORIGIN_AIRPORT_ID",
VarDesc = "OriginAirportID", VarType = "Num", VarName = "ORIGIN_AIRPORT_SEQ_ID",
VarDesc = "OriginAirportSeqID", VarType = "Num", VarName = "ORIGIN_CITY_MARKET_ID",
VarDesc = "OriginCityMarketID", VarType = "Num",
VarDesc = "Quarter", VarType = "Num",
VarDesc = "Origin", VarType = "Char",
VarDesc = "OriginCountry", VarType = "Char",
VarDesc = "OriginStateFips", VarType = "Char",
VarDesc = "OriginState", VarType = "Char",
VarDesc = "OriginStateName", VarType = "Char",
VarDesc = "OriginWac", VarType = "Num", VarName = "DEST_AIRPORT_ID",
VarDesc = "DestAirportID", VarType = "Num", VarName = "DEST_AIRPORT_SEQ_ID",
VarDesc = "DestAirportSeqID", VarType = "Num", VarName = "DEST_CITY_MARKET_ID",
VarDesc = "DestCityMarketID", VarType = "Num",
VarDesc = "Dest", VarType = "Char",
VarDesc = "DestCountry", VarType = "Char",
VarDesc = "DestStateFips", VarType = "Char",
VarDesc = "DestState", VarType = "Char",
VarDesc = "DestStateName", VarType = "Char",
VarDesc = "DestWac", VarType = "Num",
VarDesc = "Break", VarType = "Char",
VarDesc = "CouponType", VarType = "Char",
VarDesc = "TkCarrier", VarType = "Char",
VarDesc = "OpCarrier", VarType = "Char",
VarDesc = "RPCarrier", VarType = "Char",
VarDesc = "Passengers", VarType = "Num",
VarDesc = "FareClass", VarType = "Char",
VarDesc = "Distance", VarType = "Num",
VarDesc = "DistanceGroup", VarType = "Num",
VarDesc = "Gateway", VarType = "Num",
VarDesc = "ItinGeoType", VarType = "Num",
VarDesc = "CouponGeoType", VarType = "Num"
),
encode = "form",
query = list(
Table_ID = "289",
Has_Group = "0",
Is_Zipped = "0"
)
) -> res
(是的,甚至比那更长)
sqlstr
参数有 SQL 查询,我不确定 POST
中有多少是 "required" 但它 "worked for me"
res
肯定很大并且有二进制压缩数据:
res
## Response [https://transtats.bts.gov/ftproot/TranStatsData/351117019_T_DB1B_COUPON.zip]
## Date: 2018-10-14 02:18
## Status: 200
## Content-Type: application/x-zip-compressed
## Size: 14.6 MB
## <BINARY BODY>
我们可以将其保存到磁盘并确保其有效:
(save_to <- file.path("~/Data", basename(grep("\.zip", unlist(res$all_headers), value=TRUE))))
## [1] "~/Data/351117019_T_DB1B_COUPON.zip"
writeBin(httr::content(res, as="raw"), save_to)
unzip(save_to, list = TRUE)
## Name Length Date
## 1 351117019_T_DB1B_COUPON.csv 378311108 2018-10-13 22:18:00