将列转换为 r 中的行
Converting columns into rows in r
我使用代码
形成了以下数据
test <- data.frame(dis = c(10,20,30,40),dur=c(30,40,60,90),method=c("car","car","Bicycle","Bicycle"),to_lon=c(-1.980,-1.5678,-1.324,-1.456),to_lat=c(55.3009,55.3416,55.1123,55.2234),from_lon=c(-1.4565,-1.3424,-1.4566,-1.1111),from_lat=c(76.8888,65.8999,76.9088,25.3344))
dis dur method to_lon to_lat from_lon from_lat
1 10 30 car -1.9800 55.3009 -1.4565 76.8888
2 20 40 car -1.5678 55.3416 -1.3424 65.8999
3 30 60 Bicycle -1.3240 55.1123 -1.4566 76.9088
4 40 90 Bicycle -1.4560 55.2234 -1.1111 25.3344
我想转换此数据框,使其在一行中包含 to_lat 和 to_lon,在下一行中包含 from_lat 和 from_lon。其余细节不需要更改,可以复制。期望的结果应该如下
dis dur method longitude latitude
from 10 30 car -1.98 55.3009
to 10 30 car -1.4565 76.8888
from 20 40 car -1.5678 55.3416
to 20 40 car -1.3424 65.8999
from 30 60 Bicycle -1.324 55.1123
to 30 60 Bicycle -1.4566 76.9088
from 40 90 Bicycle -1.456 55.2234
to 40 90 Bicycle -1.1111 25.3344
任何帮助将不胜感激。
谢谢。
我们可以使用 data.table
中的 melt
,它可以包含多个 measure
列。
library(data.table)
dM <- melt(setDT(test), measure=patterns('lon', 'lat'),
value.name=c('longitude', 'latitude'))
#change the 'variable' column from numeric index to 'from/to'
dM[, variable:= c('from', 'to')[variable]]
#create a sequence column grouped by 'variable'
dM[,i1:= 1:.N ,variable]
#order based on the 'i1'
res <- dM[order(i1)][,i1:=NULL]
res
# dis dur method variable longitude latitude
#1: 10 30 car from -1.9800 55.3009
#2: 10 30 car to -1.4565 76.8888
#3: 20 40 car from -1.5678 55.3416
#4: 20 40 car to -1.3424 65.8999
#5: 30 60 Bicycle from -1.3240 55.1123
#6: 30 60 Bicycle to -1.4566 76.9088
#7: 40 90 Bicycle from -1.4560 55.2234
#8: 40 90 Bicycle to -1.1111 25.3344
这可能不是最优雅的解决方案,但它应该可以工作并且可以理解:
我们将数据分成两个数据帧:一个包含 'from' 经度和纬度数据(称为 testF),另一个包含 'to' 数据(称为 test)。然后我们使用 rbind 将 'testF' 的行插入 'test'.
中的适当位置
test <- data.frame(dis = c(10,20,30,40),dur=c(30,40,60,90),method=c("car","car","Bicycle","Bicycle"),to_lon=c(-1.980,-1.5678,-1.324,-1.456),to_lat=c(55.3009,55.3416,55.1123,55.2234),from_lon=c(-1.4565,-1.3424,-1.4566,-1.1111),from_lat=c(76.8888,65.8999,76.9088,25.3344))
testF <- test[,c(1:3,6,7)]
names(testF)[4:5] <- c("lonitude", "latitude")
test <- test[,1:5]
names(test)[4:5] <- c("lonitude", "latitude")
for(i in dim(test)[1]:1) {
test <- rbind(test[1:i,], testF[i,], test[-(1:i),])
}
这是使用包 tidyr
(一种流行的数据处理包)的替代方法,它避免了 for
循环。
library(tidyr)
test <- data.frame(dis = c(10,20,30,40),dur=c(30,40,60,90),method=c("car","car","Bicycle","Bicycle"),to_lon=c(-1.980,-1.5678,-1.324,-1.456),to_lat=c(55.3009,55.3416,55.1123,55.2234),from_lon=c(-1.4565,-1.3424,-1.4566,-1.1111),from_lat=c(76.8888,65.8999,76.9088,25.3344))
test$id <- 1:dim(test)[1]
# gather latitude columns
d1 <- gather(data = test,
key = direction,
value = latitude,
to_lat, from_lat)
# gather longitude columns
d2 <- gather(data = test,
key = direction,
value = longitude,
to_lon, from_lon)
d3 <- cbind(d1[,c("direction","dis","dur","method","latitude")],d2[,c("longitude","id"),drop=FALSE])
# Create names
dir <- unlist(strsplit(d3$direction,"_"))
dir <- dir[seq(from = 1, to = length(dir), by = 2)]
# Factor and sort
d3$direction <- factor(dir)
d3[order(d3$id),]
我使用代码
形成了以下数据test <- data.frame(dis = c(10,20,30,40),dur=c(30,40,60,90),method=c("car","car","Bicycle","Bicycle"),to_lon=c(-1.980,-1.5678,-1.324,-1.456),to_lat=c(55.3009,55.3416,55.1123,55.2234),from_lon=c(-1.4565,-1.3424,-1.4566,-1.1111),from_lat=c(76.8888,65.8999,76.9088,25.3344))
dis dur method to_lon to_lat from_lon from_lat
1 10 30 car -1.9800 55.3009 -1.4565 76.8888
2 20 40 car -1.5678 55.3416 -1.3424 65.8999
3 30 60 Bicycle -1.3240 55.1123 -1.4566 76.9088
4 40 90 Bicycle -1.4560 55.2234 -1.1111 25.3344
我想转换此数据框,使其在一行中包含 to_lat 和 to_lon,在下一行中包含 from_lat 和 from_lon。其余细节不需要更改,可以复制。期望的结果应该如下
dis dur method longitude latitude
from 10 30 car -1.98 55.3009
to 10 30 car -1.4565 76.8888
from 20 40 car -1.5678 55.3416
to 20 40 car -1.3424 65.8999
from 30 60 Bicycle -1.324 55.1123
to 30 60 Bicycle -1.4566 76.9088
from 40 90 Bicycle -1.456 55.2234
to 40 90 Bicycle -1.1111 25.3344
任何帮助将不胜感激。
谢谢。
我们可以使用 data.table
中的 melt
,它可以包含多个 measure
列。
library(data.table)
dM <- melt(setDT(test), measure=patterns('lon', 'lat'),
value.name=c('longitude', 'latitude'))
#change the 'variable' column from numeric index to 'from/to'
dM[, variable:= c('from', 'to')[variable]]
#create a sequence column grouped by 'variable'
dM[,i1:= 1:.N ,variable]
#order based on the 'i1'
res <- dM[order(i1)][,i1:=NULL]
res
# dis dur method variable longitude latitude
#1: 10 30 car from -1.9800 55.3009
#2: 10 30 car to -1.4565 76.8888
#3: 20 40 car from -1.5678 55.3416
#4: 20 40 car to -1.3424 65.8999
#5: 30 60 Bicycle from -1.3240 55.1123
#6: 30 60 Bicycle to -1.4566 76.9088
#7: 40 90 Bicycle from -1.4560 55.2234
#8: 40 90 Bicycle to -1.1111 25.3344
这可能不是最优雅的解决方案,但它应该可以工作并且可以理解:
我们将数据分成两个数据帧:一个包含 'from' 经度和纬度数据(称为 testF),另一个包含 'to' 数据(称为 test)。然后我们使用 rbind 将 'testF' 的行插入 'test'.
中的适当位置test <- data.frame(dis = c(10,20,30,40),dur=c(30,40,60,90),method=c("car","car","Bicycle","Bicycle"),to_lon=c(-1.980,-1.5678,-1.324,-1.456),to_lat=c(55.3009,55.3416,55.1123,55.2234),from_lon=c(-1.4565,-1.3424,-1.4566,-1.1111),from_lat=c(76.8888,65.8999,76.9088,25.3344))
testF <- test[,c(1:3,6,7)]
names(testF)[4:5] <- c("lonitude", "latitude")
test <- test[,1:5]
names(test)[4:5] <- c("lonitude", "latitude")
for(i in dim(test)[1]:1) {
test <- rbind(test[1:i,], testF[i,], test[-(1:i),])
}
这是使用包 tidyr
(一种流行的数据处理包)的替代方法,它避免了 for
循环。
library(tidyr)
test <- data.frame(dis = c(10,20,30,40),dur=c(30,40,60,90),method=c("car","car","Bicycle","Bicycle"),to_lon=c(-1.980,-1.5678,-1.324,-1.456),to_lat=c(55.3009,55.3416,55.1123,55.2234),from_lon=c(-1.4565,-1.3424,-1.4566,-1.1111),from_lat=c(76.8888,65.8999,76.9088,25.3344))
test$id <- 1:dim(test)[1]
# gather latitude columns
d1 <- gather(data = test,
key = direction,
value = latitude,
to_lat, from_lat)
# gather longitude columns
d2 <- gather(data = test,
key = direction,
value = longitude,
to_lon, from_lon)
d3 <- cbind(d1[,c("direction","dis","dur","method","latitude")],d2[,c("longitude","id"),drop=FALSE])
# Create names
dir <- unlist(strsplit(d3$direction,"_"))
dir <- dir[seq(from = 1, to = length(dir), by = 2)]
# Factor and sort
d3$direction <- factor(dir)
d3[order(d3$id),]