R RSelenium 和循环中的幻影
R RSelenium and phantom in a loop
我从之前的 Whosebug 讨论 (Extracting data from javascript with R) 中借用了以下代码。
我基本上是在尝试通过网络抓取某些药物的一些数据。当我 运行 单个药物代码 (2203) 的代码时,它工作得很好!
appURL <- "http://web.sivicos.gov.co:8080/consultas/consultas/consreg_encabcum.jsp"
library(RSelenium)
pJS <- phantom(extras = c('--ssl-protocol=tlsv1'))
Sys.sleep(5) # give the binary a moment
remDr <- remoteDriver(browserName = "phantom")
remDr$open()
Sys.sleep(1) # give the binary a moment
remDr$navigate(appURL)
# Get the third list item of the select box (MEDICAMENTOS)
webElem <- remDr$findElement("css", "select[name='grupo'] option:nth-child(3)")
webElem$clickElement() # select this element
# Send text to input value="" name="expediente
webElem <- remDr$findElement("css", "input[name='expediente']")
webElem$sendKeysToElement(list(2203))
# Click the Buscar button
remDr$findElement("id", "INPUT2")$clickElement()
Sys.sleep(3) # give the binary a moment
remDr$switchToFrame(remDr$findElement("css", "iframe[name='datos']"))
remDr$findElement("css", "a")$clickElement() # click the link given in the iframe
# get the resulting data
appData <- remDr$getPageSource()[[1]]
# close phantom js
pJS$stop()
但是当我把它放在一个循环中,以便我可以检索我需要的所有药物的信息时......它崩溃了。下面是循环内的代码。
for(cum in 2203){
appURL <- "http://web.sivicos.gov.co:8080/consultas/consultas/consreg_encabcum.jsp"
library(RSelenium)
pJS <- phantom(extras = c('--ssl-protocol=tlsv1'))
Sys.sleep(5) # give the binary a moment
remDr <- remoteDriver(browserName = "phantom")
remDr$open()
Sys.sleep(1) # give the binary a moment
remDr$navigate(appURL)
# Get the third list item of the select box (MEDICAMENTOS)
webElem <- remDr$findElement("css", "select[name='grupo'] option:nth-child(3)")
webElem$clickElement() # select this element
# Send text to input value="" name="expediente
webElem <- remDr$findElement("css", "input[name='expediente']")
webElem$sendKeysToElement(list(cum))
# Click the Buscar button
remDr$findElement("id", "INPUT2")$clickElement()
Sys.sleep(3) # give the binary a moment
remDr$switchToFrame(remDr$findElement("css", "iframe[name='datos']"))
remDr$findElement("css", "a")$clickElement() # click the link given in the iframe
# get the resulting data
appData <- remDr$getPageSource()[[1]]
# close phantom js
pJS$stop()
readHTMLTable(appData, which = 3)
}
有什么想法吗?我试着给幻影时间做一些事情,因为我听说这可能是个问题,但它没有用。它不适用于单个代码 2203 或两个 c(2202,2203)
网站似乎正在测试用户代理:
appURL <- "http://web.sivicos.gov.co:8080/consultas/consultas/consreg_encabcum.jsp"
library(RSelenium)
pJS <- phantom(extras = c('--ssl-protocol=tlsv1'))
Sys.sleep(5) # give the binary a moment
for(cum in "2203"){
eCap <- list(phantomjs.page.settings.userAgent
= "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0")
remDr <- remoteDriver(browserName = "phantomjs", extraCapabilities = eCap)
remDr$open()
Sys.sleep(1) # give the binary a moment
remDr$navigate(appURL)
# Get the third list item of the select box (MEDICAMENTOS)
webElem <- remDr$findElement("css", "select[name='grupo'] option:nth-child(3)")
webElem$clickElement() # select this element
# Send text to input value="" name="expediente
webElem <- remDr$findElement("css", "input[name='expediente']")
webElem$sendKeysToElement(list(cum))
# Click the Buscar button
remDr$findElement("id", "INPUT2")$clickElement()
Sys.sleep(3) # give the binary a moment
remDr$phantomExecute("var page = this;
page.switchToFrame('datos');
page.evaluate(function() {
document.querySelector('a').click();
});
")
# get the resulting data
Sys.sleep(3) # give the binary a moment
appData <- remDr$getPageSource()[[1]]
# close phantom js
readHTMLTable(appData, which = 3)
remDr$close()
}
pJS$stop()
我从之前的 Whosebug 讨论 (Extracting data from javascript with R) 中借用了以下代码。 我基本上是在尝试通过网络抓取某些药物的一些数据。当我 运行 单个药物代码 (2203) 的代码时,它工作得很好!
appURL <- "http://web.sivicos.gov.co:8080/consultas/consultas/consreg_encabcum.jsp"
library(RSelenium)
pJS <- phantom(extras = c('--ssl-protocol=tlsv1'))
Sys.sleep(5) # give the binary a moment
remDr <- remoteDriver(browserName = "phantom")
remDr$open()
Sys.sleep(1) # give the binary a moment
remDr$navigate(appURL)
# Get the third list item of the select box (MEDICAMENTOS)
webElem <- remDr$findElement("css", "select[name='grupo'] option:nth-child(3)")
webElem$clickElement() # select this element
# Send text to input value="" name="expediente
webElem <- remDr$findElement("css", "input[name='expediente']")
webElem$sendKeysToElement(list(2203))
# Click the Buscar button
remDr$findElement("id", "INPUT2")$clickElement()
Sys.sleep(3) # give the binary a moment
remDr$switchToFrame(remDr$findElement("css", "iframe[name='datos']"))
remDr$findElement("css", "a")$clickElement() # click the link given in the iframe
# get the resulting data
appData <- remDr$getPageSource()[[1]]
# close phantom js
pJS$stop()
但是当我把它放在一个循环中,以便我可以检索我需要的所有药物的信息时......它崩溃了。下面是循环内的代码。
for(cum in 2203){
appURL <- "http://web.sivicos.gov.co:8080/consultas/consultas/consreg_encabcum.jsp"
library(RSelenium)
pJS <- phantom(extras = c('--ssl-protocol=tlsv1'))
Sys.sleep(5) # give the binary a moment
remDr <- remoteDriver(browserName = "phantom")
remDr$open()
Sys.sleep(1) # give the binary a moment
remDr$navigate(appURL)
# Get the third list item of the select box (MEDICAMENTOS)
webElem <- remDr$findElement("css", "select[name='grupo'] option:nth-child(3)")
webElem$clickElement() # select this element
# Send text to input value="" name="expediente
webElem <- remDr$findElement("css", "input[name='expediente']")
webElem$sendKeysToElement(list(cum))
# Click the Buscar button
remDr$findElement("id", "INPUT2")$clickElement()
Sys.sleep(3) # give the binary a moment
remDr$switchToFrame(remDr$findElement("css", "iframe[name='datos']"))
remDr$findElement("css", "a")$clickElement() # click the link given in the iframe
# get the resulting data
appData <- remDr$getPageSource()[[1]]
# close phantom js
pJS$stop()
readHTMLTable(appData, which = 3)
}
有什么想法吗?我试着给幻影时间做一些事情,因为我听说这可能是个问题,但它没有用。它不适用于单个代码 2203 或两个 c(2202,2203)
网站似乎正在测试用户代理:
appURL <- "http://web.sivicos.gov.co:8080/consultas/consultas/consreg_encabcum.jsp"
library(RSelenium)
pJS <- phantom(extras = c('--ssl-protocol=tlsv1'))
Sys.sleep(5) # give the binary a moment
for(cum in "2203"){
eCap <- list(phantomjs.page.settings.userAgent
= "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0")
remDr <- remoteDriver(browserName = "phantomjs", extraCapabilities = eCap)
remDr$open()
Sys.sleep(1) # give the binary a moment
remDr$navigate(appURL)
# Get the third list item of the select box (MEDICAMENTOS)
webElem <- remDr$findElement("css", "select[name='grupo'] option:nth-child(3)")
webElem$clickElement() # select this element
# Send text to input value="" name="expediente
webElem <- remDr$findElement("css", "input[name='expediente']")
webElem$sendKeysToElement(list(cum))
# Click the Buscar button
remDr$findElement("id", "INPUT2")$clickElement()
Sys.sleep(3) # give the binary a moment
remDr$phantomExecute("var page = this;
page.switchToFrame('datos');
page.evaluate(function() {
document.querySelector('a').click();
});
")
# get the resulting data
Sys.sleep(3) # give the binary a moment
appData <- remDr$getPageSource()[[1]]
# close phantom js
readHTMLTable(appData, which = 3)
remDr$close()
}
pJS$stop()