使用后总结 pivot_wider
Summarise after using pivot_wider
我正在尝试使用 tidyr
中的 pivot_wider
执行基本组。我正在寻找的是商务旅客和游客之间的平均评分。但是,我遇到了以下错误:
Error: Problem with `mutate()` input `for_business`.
x invalid 'labels'; length 2 should be 1 or 1
i Input `for_business` is `factor(for_business, labels = c("business", "tourist"))`.
i The error occurred in group 2: property_id = 1002.
我的代码:
library(tidyverse)
bookings <- read_csv("bookings.csv")
bookings %>%
group_by(property_id, for_business) %>%
summarize(avg_review_score = mean(review_score, na.rm = TRUE)) %>%
mutate(for_business = factor(for_business, labels = c("business", "tourist"))) %>%
mutate(diff = business - tourist) %>%
pivot_wider(names_from = for_business, values_from = avg_review_score) %>%
summarize(avg_diff = mean(diff, na.rm = TRUE)) %>%
ungroup()
dput(head(bookings))
的输出
> dput(head(bookings))
structure(list(booker_id = c("215934017ba98c09f30dedd29237b43dad5c7b5f",
"7f590fd6d318248a48665f7f7db529aca40c84f5", "10f0f138e8bb1015d3928f2b7d828cbb50cd0804",
"7b55021a4160dde65e31963fa55a096535bcad17", "6694a79d158c7818cd63831b71bac91286db5aff",
"d0358740d5f15e85523f94ab8219f25d8c017347"), property_id = c(2668L,
4656L, 4563L, 4088L, 2188L, 4171L), room_nights = c(4L, 5L, 6L,
7L, 4L, 2L), price_per_night = c(91.4669561442773, 106.504997616816,
86.9913739625713, 92.3656155139053, 104.838941902747, 109.981876495045
), checkin_day = c("mon", "tue", "wed", "fri", "tue", "fri"),
for_business = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
), status = c("cancelled", "cancelled", "stayed", "stayed",
"stayed", "cancelled"), review_score = c(NA, NA, 6.25812265672399,
5.953597754693, 6.43474489539585, NA)), row.names = c(NA,
6L), class = "data.frame")
您还没有分享足够的数据,但您可以尝试:
library(dplyr)
library(tidyr)
bookings %>%
group_by(property_id, for_business) %>%
summarize(avg_review_score = mean(review_score, na.rm = TRUE)) %>%
ungroup %>%
mutate(for_business = c("tourist", "business")[for_business + 1]) %>%
pivot_wider(names_from = for_business, values_from = avg_review_score) %>%
mutate(diff = business - tourist) %>%
summarize(avg_diff = mean(diff, na.rm = TRUE))
我正在尝试使用 tidyr
中的 pivot_wider
执行基本组。我正在寻找的是商务旅客和游客之间的平均评分。但是,我遇到了以下错误:
Error: Problem with `mutate()` input `for_business`.
x invalid 'labels'; length 2 should be 1 or 1
i Input `for_business` is `factor(for_business, labels = c("business", "tourist"))`.
i The error occurred in group 2: property_id = 1002.
我的代码:
library(tidyverse)
bookings <- read_csv("bookings.csv")
bookings %>%
group_by(property_id, for_business) %>%
summarize(avg_review_score = mean(review_score, na.rm = TRUE)) %>%
mutate(for_business = factor(for_business, labels = c("business", "tourist"))) %>%
mutate(diff = business - tourist) %>%
pivot_wider(names_from = for_business, values_from = avg_review_score) %>%
summarize(avg_diff = mean(diff, na.rm = TRUE)) %>%
ungroup()
dput(head(bookings))
> dput(head(bookings))
structure(list(booker_id = c("215934017ba98c09f30dedd29237b43dad5c7b5f",
"7f590fd6d318248a48665f7f7db529aca40c84f5", "10f0f138e8bb1015d3928f2b7d828cbb50cd0804",
"7b55021a4160dde65e31963fa55a096535bcad17", "6694a79d158c7818cd63831b71bac91286db5aff",
"d0358740d5f15e85523f94ab8219f25d8c017347"), property_id = c(2668L,
4656L, 4563L, 4088L, 2188L, 4171L), room_nights = c(4L, 5L, 6L,
7L, 4L, 2L), price_per_night = c(91.4669561442773, 106.504997616816,
86.9913739625713, 92.3656155139053, 104.838941902747, 109.981876495045
), checkin_day = c("mon", "tue", "wed", "fri", "tue", "fri"),
for_business = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
), status = c("cancelled", "cancelled", "stayed", "stayed",
"stayed", "cancelled"), review_score = c(NA, NA, 6.25812265672399,
5.953597754693, 6.43474489539585, NA)), row.names = c(NA,
6L), class = "data.frame")
您还没有分享足够的数据,但您可以尝试:
library(dplyr)
library(tidyr)
bookings %>%
group_by(property_id, for_business) %>%
summarize(avg_review_score = mean(review_score, na.rm = TRUE)) %>%
ungroup %>%
mutate(for_business = c("tourist", "business")[for_business + 1]) %>%
pivot_wider(names_from = for_business, values_from = avg_review_score) %>%
mutate(diff = business - tourist) %>%
summarize(avg_diff = mean(diff, na.rm = TRUE))