将以英寸为单位的高度特征向量转换为厘米?
Convert character vector of height in inches to cm?
我得到了一个字符向量:
tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
我想把它转换成厘米。
请告知我该怎么做?
> dat <- tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
> dat$inches <- gsub("[\"]", "",dat$H) %>%
strsplit(., "'") %>%
lapply(., function(x) {
x <- as.numeric(x);
(x[1]*30.48) + (x[2]/12)*30.48
}) %>%
unlist
> dat
# A tibble: 6 x 2
H inches
<chr> <dbl>
1 "6'2\"" 188.
2 "5'10\"" 178.
3 "5'5\"" 165.
4 "5'1\"" 155.
5 "5'5\"" 165.
6 "5'4\"" 163.
您甚至可以使用 map
而不是 lapply
> gsub("[\"]", "",dat$H) %>%
strsplit(., "'") %>%
map_dbl(function(x){
x <- as.numeric(x)
(x[1]*30.48) + (x[2]/12)*30.48
})
[1] 187.96 177.80 165.10 154.94 165.10 162.56
有几种方法可以使用
1)粘贴成单个字符串后用fread
读取
library(data.table)
fread(paste(sub('"', "", df1$H), collapse="\n"), sep="'")[,
as.matrix(.SD) %*% c(30.48, 2.54)][,1]
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
2) 使用 gsubfn
library(gsubfn)
as.numeric(gsubfn("(\d)'(\d+)", ~ as.numeric(x) * 30.48 +
as.numeric(y) * 2.54, sub('"', '', df1$H)))
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
3) 与 separate
library(tidyverse)
df1 %>%
separate(H, into = c("H1", "H2"), convert = TRUE) %>%
transmute(H = H1 * 30.48 + H2 * 2.54)
# A tibble: 6 x 1
# H
# <dbl>
#1 188.
#2 178.
#3 165.
#4 155.
#5 165.
#6 163.
4) 与 measurements
library(measurements)
library(tidyverse)
df1 %>%
separate(H, into = c("H1", "H2"), convert = TRUE) %>%
transmute(H = conv_unit(H1, "ft", "cm") + conv_unit(H2, "inch", "cm"))
一种选择是提取所有数字并将其转换为矩阵,然后执行计算。
mat <- stringr::str_extract_all(df$H, "\d+", simplify = TRUE)
as.numeric(mat[, 1]) * 30.48 + as.numeric(mat[, 2]) * 2.54
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
其中 mat
是
# [,1] [,2]
#[1,] "6" "2"
#[2,] "5" "10"
#[3,] "5" "5"
#[4,] "5" "1"
#[5,] "5" "5"
#[6,] "5" "4"
第一列是英尺,第二列是英寸。
出于好奇,我想在 base R 中解决这个问题
sapply(strsplit(sub("(\d+)'(\d+).*", "\1-\2", df$H), "-"), function(x)
as.numeric(x[1]) * 30.48 + as.numeric(x[2]) * 2.54)
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
这遵循类似的逻辑,使用 sub
从字符串中提取 2 个数字,使用 strsplit
拆分它们,然后将每个数字转换为数字并执行计算。
使用stringi
包提取相关单位:
library(stringi)
Raw <- c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\"")
## Extract Feet units by regex searching for 1 or more digits followed by a '
Feet <- stri_extract_first_regex(Raw, "[[:digit:]]+(?=')"))
## Extract Inch units by regex searching for 1 or 2 digits followed by a "
Inches <- stri_extract_first_regex(Raw, "[[:digit:]]{1,2}(?=\")"))
## Combine Feet and Inches
TotalInches <- 12 * as.numeric(Feet) + as.numeric(Inches)
## Convert to cm
CM <- 2.54 * TotalInches
print(CM)
# [1] 187.96 177.80 165.10 154.94 165.10 162.56
如果您需要对多列执行此操作,将步骤定义为脚本顶部的函数可能是有意义的,这样您可以更简洁地调用它并且不需要将中间结果存储在全球环境。
此处函数版本中的另一个考虑是将 NA
匹配替换为 0
值,以便像 1'
或 11"
[=24= 这样的有效测量] 有效结果而不是 NA
。
FtInToCm <- function(x){
Feet <- as.numeric(stringi::stri_extract_first_regex(Raw, "[[:digit:]]+(?=')"))
Inches <- as.numeric(stringi::stri_extract_first_regex(Raw, "[[:digit:]]{1,2}(?=\")"))
return(2.54 * (12 * ifelse(is.na(Feet),0,Feet) + ifelse(is.na(Inches),0,Inches)))
}
FtInToCm(Raw)
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
我添加另一个答案,只是为了给你另一个选择,因为我在看到其他答案之前已经写好了。
我先把字符串转成数字再转单位:
library(dplyr)
library(stringr)
df <- tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
df %>%
mutate(foot = str_extract(H, "^\d+'"),
inch = str_extract(H, "\d+\"$")) %>% # split foot from inch
mutate(foot = as.numeric(str_remove(foot, "[^\d]")),
inch = as.numeric(str_remove(inch, "[^\d]"))) %>% # convert to numeric
mutate(H_new = cm(foot * 12) + cm(inch)) # convert units
# A tibble: 6 x 4
H foot inch H_new
<chr> <dbl> <dbl> <dbl>
1 "6'2\"" 6 2 188.
2 "5'10\"" 5 10 178.
3 "5'5\"" 5 5 165.
4 "5'1\"" 5 1 155.
5 "5'5\"" 5 5 165.
6 "5'4\"" 5 4 163.
我得到了一个字符向量:
tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
我想把它转换成厘米。
请告知我该怎么做?
> dat <- tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
> dat$inches <- gsub("[\"]", "",dat$H) %>%
strsplit(., "'") %>%
lapply(., function(x) {
x <- as.numeric(x);
(x[1]*30.48) + (x[2]/12)*30.48
}) %>%
unlist
> dat
# A tibble: 6 x 2
H inches
<chr> <dbl>
1 "6'2\"" 188.
2 "5'10\"" 178.
3 "5'5\"" 165.
4 "5'1\"" 155.
5 "5'5\"" 165.
6 "5'4\"" 163.
您甚至可以使用 map
而不是 lapply
> gsub("[\"]", "",dat$H) %>%
strsplit(., "'") %>%
map_dbl(function(x){
x <- as.numeric(x)
(x[1]*30.48) + (x[2]/12)*30.48
})
[1] 187.96 177.80 165.10 154.94 165.10 162.56
有几种方法可以使用
1)粘贴成单个字符串后用fread
读取
library(data.table)
fread(paste(sub('"', "", df1$H), collapse="\n"), sep="'")[,
as.matrix(.SD) %*% c(30.48, 2.54)][,1]
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
2) 使用 gsubfn
library(gsubfn)
as.numeric(gsubfn("(\d)'(\d+)", ~ as.numeric(x) * 30.48 +
as.numeric(y) * 2.54, sub('"', '', df1$H)))
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
3) 与 separate
library(tidyverse)
df1 %>%
separate(H, into = c("H1", "H2"), convert = TRUE) %>%
transmute(H = H1 * 30.48 + H2 * 2.54)
# A tibble: 6 x 1
# H
# <dbl>
#1 188.
#2 178.
#3 165.
#4 155.
#5 165.
#6 163.
4) 与 measurements
library(measurements)
library(tidyverse)
df1 %>%
separate(H, into = c("H1", "H2"), convert = TRUE) %>%
transmute(H = conv_unit(H1, "ft", "cm") + conv_unit(H2, "inch", "cm"))
一种选择是提取所有数字并将其转换为矩阵,然后执行计算。
mat <- stringr::str_extract_all(df$H, "\d+", simplify = TRUE)
as.numeric(mat[, 1]) * 30.48 + as.numeric(mat[, 2]) * 2.54
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
其中 mat
是
# [,1] [,2]
#[1,] "6" "2"
#[2,] "5" "10"
#[3,] "5" "5"
#[4,] "5" "1"
#[5,] "5" "5"
#[6,] "5" "4"
第一列是英尺,第二列是英寸。
出于好奇,我想在 base R 中解决这个问题
sapply(strsplit(sub("(\d+)'(\d+).*", "\1-\2", df$H), "-"), function(x)
as.numeric(x[1]) * 30.48 + as.numeric(x[2]) * 2.54)
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
这遵循类似的逻辑,使用 sub
从字符串中提取 2 个数字,使用 strsplit
拆分它们,然后将每个数字转换为数字并执行计算。
使用stringi
包提取相关单位:
library(stringi)
Raw <- c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\"")
## Extract Feet units by regex searching for 1 or more digits followed by a '
Feet <- stri_extract_first_regex(Raw, "[[:digit:]]+(?=')"))
## Extract Inch units by regex searching for 1 or 2 digits followed by a "
Inches <- stri_extract_first_regex(Raw, "[[:digit:]]{1,2}(?=\")"))
## Combine Feet and Inches
TotalInches <- 12 * as.numeric(Feet) + as.numeric(Inches)
## Convert to cm
CM <- 2.54 * TotalInches
print(CM)
# [1] 187.96 177.80 165.10 154.94 165.10 162.56
如果您需要对多列执行此操作,将步骤定义为脚本顶部的函数可能是有意义的,这样您可以更简洁地调用它并且不需要将中间结果存储在全球环境。
此处函数版本中的另一个考虑是将 NA
匹配替换为 0
值,以便像 1'
或 11"
[=24= 这样的有效测量] 有效结果而不是 NA
。
FtInToCm <- function(x){
Feet <- as.numeric(stringi::stri_extract_first_regex(Raw, "[[:digit:]]+(?=')"))
Inches <- as.numeric(stringi::stri_extract_first_regex(Raw, "[[:digit:]]{1,2}(?=\")"))
return(2.54 * (12 * ifelse(is.na(Feet),0,Feet) + ifelse(is.na(Inches),0,Inches)))
}
FtInToCm(Raw)
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
我添加另一个答案,只是为了给你另一个选择,因为我在看到其他答案之前已经写好了。
我先把字符串转成数字再转单位:
library(dplyr)
library(stringr)
df <- tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
df %>%
mutate(foot = str_extract(H, "^\d+'"),
inch = str_extract(H, "\d+\"$")) %>% # split foot from inch
mutate(foot = as.numeric(str_remove(foot, "[^\d]")),
inch = as.numeric(str_remove(inch, "[^\d]"))) %>% # convert to numeric
mutate(H_new = cm(foot * 12) + cm(inch)) # convert units
# A tibble: 6 x 4
H foot inch H_new
<chr> <dbl> <dbl> <dbl>
1 "6'2\"" 6 2 188.
2 "5'10\"" 5 10 178.
3 "5'5\"" 5 5 165.
4 "5'1\"" 5 1 155.
5 "5'5\"" 5 5 165.
6 "5'4\"" 5 4 163.