R httr POST 函数 return 发送失败:连接已重置
R httr POST function return Send failure: Connection was reset
我正在尝试抓取“市场细分”选项卡下的 table,如下图所示,用于处理类似任务的代码逻辑失败,但它在这里不起作用并返回
错误发送失败:连接已重置
link<-'https://www.egx.com.eg/en/prices.aspx'
headers.id <-c('User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
'Referer'= 'https://www.egx.com.eg/en/prices.aspx',
'Content-Type'= 'application/x-www-form-urlencoded',
'Origin'='https://www.egx.com.eg',
'Host'= 'www.egx.com.eg',
'Content-Type'= 'application/x-www-form-urlencoded',
'sec-ch-ua'='" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"'
)
pgsession<-session(link,httr::add_headers(.headers=headers.id), verbose())
pgform<-html_form(pgsession)[[1]]
page<-POST(link, body=list(
'__EVENTTARGET'= pgform$fields$`__EVENTTARGET`$value,
'__EVENTARGUMENT'=pgform$fields$`__EVENTARGUMENT`$value,
'__VIEWSTATE'=pgform$fields$`__VIEWSTATE`$value,
'ctl00$H$txtSearchAll'=pgform$fields$`ctl00$H$txtSearchAll`$value,
'ctl00$H$rblSearchType'=pgform$fields$`ctl00$H$rblSearchType`$value,
'ctl00$H$rblSearchType'=pgform$fields$`ctl00$H$rblSearchType`$value,
'ctl00$H$imgBtnSearch'=pgform$fields$`ctl00$H$imgBtnSearch`$value,
'ctl00$C$S$TextBox1'=pgform$fields$`ctl00$C$S$TextBox1`$value
),
encode="form", verbose()
)
我一直在搜索,直到找到使用 rvest 的解决方案,如下所示:
link<-'https://www.egx.com.eg/en/prices.aspx'
headers.id <-c('Accept'='*/*',
'Accept-Encoding'='gzip, deflate, br',
'Accept-Language'='en-US,en;q=0.9',
'Cache-Control'='no-cache',
'Connection'='keep-alive',
'Content-Type'='application/x-www-form-urlencoded',
'Host'='www.egx.com.eg',
'Origin'='https://www.egx.com.eg',
'Referer'='https://www.egx.com.eg/en/prices.aspx',
'sec-ch-ua'='" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
'sec-ch-ua-mobile'='?0',
'Sec-Fetch-Dest'='empty',
'Sec-Fetch-Mode'='cors',
'Sec-Fetch-Site'='same-origin',
'User-Agent'='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59',
'X-MicrosoftAjax'='Delta=true'
)
pgsession<-session(link,httr::add_headers(.headers=headers.id))
pgform<-html_form(pgsession)[[1]]
filled_form<-html_form_set(pgform,
'__EVENTTARGET'= 'ctl00$C$S$lkMarket',
'__EVENTARGUMENT'=pgform$fields$`__EVENTARGUMENT`$value,
'__VIEWSTATE'=pgform$fields$`__VIEWSTATE`$value,
'ctl00$H$txtSearchAll'=pgform$fields$`ctl00$H$txtSearchAll`$value,
'ctl00$H$rblSearchType'=pgform$fields$`ctl00$H$rblSearchType`$value,
'ctl00$H$rblSearchType'="1",
'ctl00$H$imgBtnSearch'=pgform$fields$`ctl00$H$imgBtnSearch`$value,
'ctl00$C$S$TextBox1'=pgform$fields$`ctl00$C$S$TextBox1`$value
)
page<-session_submit(pgsession,filled_form)
# in the above example change eventtarget as "ctl00$ContentPlaceHolder1$DataList2$ctl02$lnk_blok" to get different table
page.html <-read_html(page$response)%>%html_table%>%.[[7]]
我正在尝试抓取“市场细分”选项卡下的 table,如下图所示,用于处理类似任务的代码逻辑失败,但它在这里不起作用并返回
错误发送失败:连接已重置
link<-'https://www.egx.com.eg/en/prices.aspx'
headers.id <-c('User-Agent'= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
'Referer'= 'https://www.egx.com.eg/en/prices.aspx',
'Content-Type'= 'application/x-www-form-urlencoded',
'Origin'='https://www.egx.com.eg',
'Host'= 'www.egx.com.eg',
'Content-Type'= 'application/x-www-form-urlencoded',
'sec-ch-ua'='" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"'
)
pgsession<-session(link,httr::add_headers(.headers=headers.id), verbose())
pgform<-html_form(pgsession)[[1]]
page<-POST(link, body=list(
'__EVENTTARGET'= pgform$fields$`__EVENTTARGET`$value,
'__EVENTARGUMENT'=pgform$fields$`__EVENTARGUMENT`$value,
'__VIEWSTATE'=pgform$fields$`__VIEWSTATE`$value,
'ctl00$H$txtSearchAll'=pgform$fields$`ctl00$H$txtSearchAll`$value,
'ctl00$H$rblSearchType'=pgform$fields$`ctl00$H$rblSearchType`$value,
'ctl00$H$rblSearchType'=pgform$fields$`ctl00$H$rblSearchType`$value,
'ctl00$H$imgBtnSearch'=pgform$fields$`ctl00$H$imgBtnSearch`$value,
'ctl00$C$S$TextBox1'=pgform$fields$`ctl00$C$S$TextBox1`$value
),
encode="form", verbose()
)
我一直在搜索,直到找到使用 rvest 的解决方案,如下所示:
link<-'https://www.egx.com.eg/en/prices.aspx'
headers.id <-c('Accept'='*/*',
'Accept-Encoding'='gzip, deflate, br',
'Accept-Language'='en-US,en;q=0.9',
'Cache-Control'='no-cache',
'Connection'='keep-alive',
'Content-Type'='application/x-www-form-urlencoded',
'Host'='www.egx.com.eg',
'Origin'='https://www.egx.com.eg',
'Referer'='https://www.egx.com.eg/en/prices.aspx',
'sec-ch-ua'='" Not;A Brand";v="99", "Microsoft Edge";v="91", "Chromium";v="91"',
'sec-ch-ua-mobile'='?0',
'Sec-Fetch-Dest'='empty',
'Sec-Fetch-Mode'='cors',
'Sec-Fetch-Site'='same-origin',
'User-Agent'='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59',
'X-MicrosoftAjax'='Delta=true'
)
pgsession<-session(link,httr::add_headers(.headers=headers.id))
pgform<-html_form(pgsession)[[1]]
filled_form<-html_form_set(pgform,
'__EVENTTARGET'= 'ctl00$C$S$lkMarket',
'__EVENTARGUMENT'=pgform$fields$`__EVENTARGUMENT`$value,
'__VIEWSTATE'=pgform$fields$`__VIEWSTATE`$value,
'ctl00$H$txtSearchAll'=pgform$fields$`ctl00$H$txtSearchAll`$value,
'ctl00$H$rblSearchType'=pgform$fields$`ctl00$H$rblSearchType`$value,
'ctl00$H$rblSearchType'="1",
'ctl00$H$imgBtnSearch'=pgform$fields$`ctl00$H$imgBtnSearch`$value,
'ctl00$C$S$TextBox1'=pgform$fields$`ctl00$C$S$TextBox1`$value
)
page<-session_submit(pgsession,filled_form)
# in the above example change eventtarget as "ctl00$ContentPlaceHolder1$DataList2$ctl02$lnk_blok" to get different table
page.html <-read_html(page$response)%>%html_table%>%.[[7]]