httr GET 函数超时

httr GET function time-out

我使用 R 中 httr 包的 GET 函数超时,设置如下:

GET("https://isir.justice.cz/isir/common/index.do", add_headers(.headers = c('"authority"="isir.justice.cz",
                                                                         "scheme"="https",
                                                                         "path"="/isir/common/index.do",
                                                                         "cache-control"="max-age=0",
                                                                         "sec-ch-ua-mobile"="?0",
                                                                         "sec-ch-ua-platform"= "Windows",
                                                                         "upgrade-insecure-requests"="1",
                                                                         "accept"="text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
                                                                         "sec-fetch-site"="none",
                                                                         "sec-fetch-mode"="navigate",
                                                                         "sec-fetch-user"="?1",
                                                                         "sec-fetch-dest"="document",
                                                                         "accept-encoding"="gzip, deflate, br",
                                                                         "accept-language"="cs-CZ,cs;q=0.9"'
                                                                         )))

但是通过powershellreturns一个网页看似相同的查询。

Invoke-WebRequest -UseBasicParsing -Uri "https://isir.justice.cz/isir/common/index.do" `
-WebSession $session `
-Headers @{
"method"="GET"
  "authority"="isir.justice.cz"
  "scheme"="https"
  "path"="/isir/common/index.do"
  "cache-control"="max-age=0"
  "sec-ch-ua-mobile"="?0"
  "sec-ch-ua-platform"="`"Windows`""
  "upgrade-insecure-requests"="1"
  "accept"="text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
  "sec-fetch-site"="none"
  "sec-fetch-mode"="navigate"
  "sec-fetch-user"="?1"
  "sec-fetch-dest"="document"
  "accept-encoding"="gzip, deflate, br"
  "accept-language"="cs-CZ,cs;q=0.9"
}

我的 R 代码有问题还是使用 R 与 powershell 之间的区别很简单?

你的代码对我来说没有 运行 因为它在某处有一个额外的 '。更正此问题,运行 没问题。如果您不断收到超时消息,您可以使用 timeout():

增加最大请求时间
library(httr)
x <- GET("https://isir.justice.cz/isir/common/index.do", timeout(10), add_headers(
  .headers = c("authority" = "isir.justice.cz",
               "scheme" = "https",
               "path" = "/isir/common/index.do",
               "cache-control" = "max-age=0",
               "sec-ch-ua-mobile" = "?0",
               "sec-ch-ua-platform" =  "Windows",
               "upgrade-insecure-requests" = "1",
               "accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
               "sec-fetch-site" = "none",
               "sec-fetch-mode" = "navigate",
               "sec-fetch-user" = "?1",
               "sec-fetch-dest" = "document",
               "accept-encoding" = "gzip, deflate, br",
               "accept-language" = "cs-CZ,cs;q=0.9")
))

作为旁注:同一个人有一个名为 httr2 的后继包。我还在使用 httr 但学习新包可能是个好主意。这是它的样子:

library(httr2)

req <- request("https://isir.justice.cz/isir/common/index.do") %>% 
  req_headers("authority" = "isir.justice.cz",
              "scheme" = "https",
              "path" = "/isir/common/index.do",
              "cache-control" = "max-age=0",
              "sec-ch-ua-mobile" = "?0",
              "sec-ch-ua-platform" =  "Windows",
              "upgrade-insecure-requests" = "1",
              "accept" = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
              "sec-fetch-site" = "none",
              "sec-fetch-mode" = "navigate",
              "sec-fetch-user" = "?1",
              "sec-fetch-dest" = "document",
              "accept-encoding" = "gzip, deflate, br",
              "accept-language" = "cs-CZ,cs;q=0.9") %>% 
  req_timeout(seconds = 10)

# check your request in a dry run
req %>% 
  req_dry_run()
#> GET /isir/common/index.do HTTP/1.1
#> Host: isir.justice.cz
#> User-Agent: httr2/0.1.1 r-curl/4.3.2 libcurl/7.80.0
#> authority: isir.justice.cz
#> scheme: https
#> path: /isir/common/index.do
#> cache-control: max-age=0
#> sec-ch-ua-mobile: ?0
#> sec-ch-ua-platform: Windows
#> upgrade-insecure-requests: 1
#> accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9
#> sec-fetch-site: none
#> sec-fetch-mode: navigate
#> sec-fetch-user: ?1
#> sec-fetch-dest: document
#> accept-encoding: gzip, deflate, br
#> accept-language: cs-CZ,cs;q=0.9

resp <- req_perform(req)
resp
#> <httr2_response>
#> GET https://isir.justice.cz/isir/common/index.do
#> Status: 200 OK
#> Content-Type: text/html
#> Body: In memory (116916 bytes)

reprex package (v2.0.1)

创建于 2022-01-03