击中空白值时出现 Purrr 函数式编程错误

Question

很抱歉，如果这是一个脑筋急转弯的问题 - 它可能是一个简单的错误处理。当其中一个变量遇到空白时（在本例中为 'num_views' 变量），此代码会中断 - 有没有办法为任何空白值 return 和 'NA'？如果有任何建议，我将不胜感激

错误响应为： 错误：tibble 中的所有列都必须是向量。 num_views 列是函数。

# Custom functions
parse_listing <- function(listing){
  
  # Review content
  address <- listings %>% html_nodes(xpath = '//p[@data-testid="listing-description"]') %>% html_text2()
  link <- listings %>% html_nodes(xpath = '//div[@data-testid="search-result"]/div/div/a[2]') %>% html_attr('href') %>% paste("https://www.zoopla.co.uk", ., sep="")
  prop_type <- listings %>% html_nodes(xpath = '//h2[@data-testid="listing-title"]') %>% html_text2()
  price <- listings %>% html_nodes(xpath = '//div[@data-testid="listing-price"]/p[@size="6"]') %>% html_text2() %>% str_remove_all("[£,]")
  est_agent <- listings %>% html_nodes(xpath = '//div[@data-testid="search-result"]//a/img') %>% html_attr('alt') %>% str_remove('Marketed by ')
  date_listed <- listings %>% html_nodes(xpath = '//span[@data-testid="date-published"]') %>% html_text2() %>% str_remove('Listed on ')  %>% dmy()
  num_views <- possibly(listings %>% html_nodes(xpath = '//span[@data-testid="number-of-views"]') %>% html_text2() %>% str_remove(' views'), otherwise = NULL)
  
  tibble(address, link, prop_type, price, est_agent, date_listed, num_views)
}

# Script
link <- 'https://www.zoopla.co.uk/for-sale/property/wd3/?page_size=25&q=wd3&radius=0&results_sort=most_popular&search_source=refine&pn=7'
page <- read_html(link)
listings <- page %>% html_nodes(xpath = '//div[@data-testid="search-result"]')

wd3p7 <- map_dfr(listings, parse_listing)

Answer 1

出现错误时用tryCatch或possibly/safely（从purrr）换行到return所需的值

library(purrr)
library(rvest)
library(dplyr)
library(lubridate)
 parse_listing <- function(listing){
  
    # Review content
    address <- possibly(function(x)  {
                   x %>%
                    html_nodes(xpath = '//p[@data-testid="listing-description"]') %>%
                      html_text2()
              }, otherwise = NA_character_)(listing)
              
   link <- possibly(function(x) {
        x %>% 
          html_nodes(xpath = '//div[@data-testid="search-result"]/div/div/a[2]') %>%
           html_attr('href') %>% 
           paste("https://www.zoopla.co.uk", ., sep="")
           }, otherwise = NA_character_)(listing)
           
   prop_type <- possibly(function(x) {
        x %>% 
         html_nodes(xpath = '//h2[@data-testid="listing-title"]') %>% 
         html_text2()
         }, otherwise = NA_character_)(listing)
         
   price <- possibly(function(x) {
     x %>% 
      html_nodes(xpath = '//div[@data-testid="listing-price"]/p[@size="6"]') %>%
       html_text2() %>% 
       str_remove_all("[£,]")
        }, otherwise = NA_character_)(listing)
   est_agent <- possibly(function(x) {
    x %>% 
     html_nodes(xpath = '//div[@data-testid="search-result"]//a/img') %>%
      html_attr('alt') %>% 
      str_remove('Marketed by ')
      }, otherwise = NA_character_)(listing)  
   date_listed <- possibly(function(x) {
      x %>% 
       html_nodes(xpath = '//span[@data-testid="date-published"]') %>% 
       html_text2() %>% 
       str_remove('Listed on ')  %>% 
       dmy()
         }, otherwise = NA_character_)(listing)  
   num_views <- possibly(function(x) {
      x %>% 
       html_nodes(xpath = '//span[@data-testid="number-of-views"]') %>% 
       html_text2() %>% 
       str_remove(' views')
       }, otherwise = NA_character_)(listing)   
   type.convert(as_tibble(do.call(qpcR:::cbind.na, dplyr::lst(address, link, prop_type, price, est_agent, date_listed, num_views))), as.is = TRUE)
   
    }

-测试

link <- 'https://www.zoopla.co.uk/for-sale/property/wd3/?page_size=25&q=wd3&radius=0&results_sort=most_popular&search_source=refine&pn=7'
page <- read_html(link)
listings <- page %>% html_nodes(xpath = '//div[@data-testid="search-result"]')
 wd3p7 <- map_dfr(setNames(listings, listings), parse_listing, .id = "listing")

-输出

> wd3p7
# A tibble: 625 × 8
   listing                                         address               link                prop_type     price est_agent        date_listed num_views
   <chr>                                           <chr>                 <chr>               <chr>         <int> <chr>                  <int>     <int>
 1 "<div data-testid=\"search-result\" class=\"ea… High Street, Rickman… https://www.zoopla… 2 bed flat … 400000 Gibbs Gillespie…       19039        40
 2 "<div data-testid=\"search-result\" class=\"ea… High Street, Rickman… https://www.zoopla… 1 bed prope… 315000 Trend & Thomas,…       19033        34
 3 "<div data-testid=\"search-result\" class=\"ea… Springwell Lane, Ric… https://www.zoopla… 2 bed flat … 375000 Purplebricks, H…       19044        32
 4 "<div data-testid=\"search-result\" class=\"ea… Rectory Road, Rickma… https://www.zoopla… 1 bed flat … 315000 Trend & Thomas,…       18897        30
 5 "<div data-testid=\"search-result\" class=\"ea… Penn House, 30 High … https://www.zoopla… Studio for … 270000 Gibbs Gillespie…       18982        25
 6 "<div data-testid=\"search-result\" class=\"ea… The Forge, Bury Lane… https://www.zoopla… 2 bed flat … 425000 Robsons, WD3           19005        25
 7 "<div data-testid=\"search-result\" class=\"ea… High Street, Rickman… https://www.zoopla… 1 bed prope… 299950 Trend & Thomas,…       18792        26
 8 "<div data-testid=\"search-result\" class=\"ea… The Forge, Bury Lane… https://www.zoopla… 2 bed flat … 450000 Savills - Rickm…       18960        19
 9 "<div data-testid=\"search-result\" class=\"ea… Homestead Road, Rick… https://www.zoopla… 1 bed flat … 279000 Trend & Thomas,…       18654        18
10 "<div data-testid=\"search-result\" class=\"ea… High Street, Rickman… https://www.zoopla… 1 bed flat … 270000 Trend & Thomas,…       18463        18
# … with 615 more rows

击中空白值时出现 Purrr 函数式编程错误

Purrr functional programming error when hitting blank values

functional-programming

r

rvest

purrr

tidyverse