将位置列拆分为邮政编码、纬度和经度
Spliting the location column into Zipcode, Latitude and Longitude
我的数据框中有一列包含邮政编码、纬度和经度
地点
"10007 (40.71363051943297, -74.00913138370635)"
"10002 (40.71612146793143, -73.98583147024613)"
"10012 (40.72553802086304, -73.99789641059084)"
"10009 (40.72664935898081, -73.97911148500697)"
我需要将它们分成三个不同的列,例如邮政编码、纬度和经度。
我试过这样做
extract(Location, c("Zip-Code","Latitude", "Longitude"), "\(([^,]+), ([^)]+)\)")
我想使用经纬度使用ggmap绘制地图
谢谢
s.tmp = "10007 (40.71363051943297, -74.00913138370635)"
对于 ZIP:
gsub('([0-9]+) .*', '\1', s.tmp)
对于纬度:
gsub('.*\((.*),.*', '\1', s.tmp)
经度:
gsub('.*, (.*)\).*', '\1', s.tmp)
基本正则表达式提取:
library(purrr)
c("10007 (40.71363051943297, -74.00913138370635)", "10002 (40.71612146793143, -73.98583147024613)",
"10012 (40.72553802086304, -73.99789641059084)", "10009 (40.72664935898081, -73.97911148500697)") %>%
stringi::stri_match_all_regex("([[:digit:]]+)[[:space:]]+\(([[:digit:]\.\-]+),[[:space:]]+([[:digit:]\.\-]+)\)") %>%
map_df(dplyr::as_data_frame) %>%
dplyr::select(zip=V2, latitude=V3, longitude=V4)
## # A tibble: 4 × 3
## zip latitude longitude
## <chr> <chr> <chr>
## 1 10007 40.71363051943297 -74.00913138370635
## 2 10002 40.71612146793143 -73.98583147024613
## 3 10012 40.72553802086304 -73.99789641059084
## 4 10009 40.72664935898081 -73.97911148500697
更具可读性:
library(purrr)
library(stringi)
library(dplyr)
library(purrr)
dat <- c("10007 (40.71363051943297, -74.00913138370635)",
"10002 (40.71612146793143, -73.98583147024613)",
"10012 (40.72553802086304, -73.99789641059084)",
"10009 (40.72664935898081, -73.97911148500697)")
zip <- "([[:digit:]]+)"
num <- "([[:digit:]\.\-]+)"
space <- "[[:space:]]+"
lp <- "\("
rp <- "\)"
comma <- ","
match_str <- zip %s+% space %s+% lp %s+% num %s+% comma %s+% space %s+% num %s+% rp
dat %>%
stri_match_all_regex(match_str) %>%
map_df(as_data_frame) %>%
select(zip=V2, latitude=V3, longitude=V4)
我的数据框中有一列包含邮政编码、纬度和经度
地点
"10007 (40.71363051943297, -74.00913138370635)"
"10002 (40.71612146793143, -73.98583147024613)"
"10012 (40.72553802086304, -73.99789641059084)"
"10009 (40.72664935898081, -73.97911148500697)"
我需要将它们分成三个不同的列,例如邮政编码、纬度和经度。
我试过这样做
extract(Location, c("Zip-Code","Latitude", "Longitude"), "\(([^,]+), ([^)]+)\)")
我想使用经纬度使用ggmap绘制地图
谢谢
s.tmp = "10007 (40.71363051943297, -74.00913138370635)"
对于 ZIP:
gsub('([0-9]+) .*', '\1', s.tmp)
对于纬度:
gsub('.*\((.*),.*', '\1', s.tmp)
经度:
gsub('.*, (.*)\).*', '\1', s.tmp)
基本正则表达式提取:
library(purrr)
c("10007 (40.71363051943297, -74.00913138370635)", "10002 (40.71612146793143, -73.98583147024613)",
"10012 (40.72553802086304, -73.99789641059084)", "10009 (40.72664935898081, -73.97911148500697)") %>%
stringi::stri_match_all_regex("([[:digit:]]+)[[:space:]]+\(([[:digit:]\.\-]+),[[:space:]]+([[:digit:]\.\-]+)\)") %>%
map_df(dplyr::as_data_frame) %>%
dplyr::select(zip=V2, latitude=V3, longitude=V4)
## # A tibble: 4 × 3
## zip latitude longitude
## <chr> <chr> <chr>
## 1 10007 40.71363051943297 -74.00913138370635
## 2 10002 40.71612146793143 -73.98583147024613
## 3 10012 40.72553802086304 -73.99789641059084
## 4 10009 40.72664935898081 -73.97911148500697
更具可读性:
library(purrr)
library(stringi)
library(dplyr)
library(purrr)
dat <- c("10007 (40.71363051943297, -74.00913138370635)",
"10002 (40.71612146793143, -73.98583147024613)",
"10012 (40.72553802086304, -73.99789641059084)",
"10009 (40.72664935898081, -73.97911148500697)")
zip <- "([[:digit:]]+)"
num <- "([[:digit:]\.\-]+)"
space <- "[[:space:]]+"
lp <- "\("
rp <- "\)"
comma <- ","
match_str <- zip %s+% space %s+% lp %s+% num %s+% comma %s+% space %s+% num %s+% rp
dat %>%
stri_match_all_regex(match_str) %>%
map_df(as_data_frame) %>%
select(zip=V2, latitude=V3, longitude=V4)