循环清理 table,其中观察值存储为列
Loop to clean up table where observations are stored as column
我有一个 table,它按以下方式在 x
列中存储观察结果,在 y
列中存储变量名称。
我正在尝试编写一个 R
循环来创建一个矩阵,其中每个观察值是一行,每个变量是一列。
问题是并不是所有的观测值都有所有的变量。
原始数据:
x
y
Apple
Fruit
Austria
Origin
Summer
Season
Orange
Fruit
Spain
Origin
Pear
Fruit
Tomato
Fruit
Italy
Origin
Summer
Season
期望输出:
Fruit
Origin
Season
Apple
Austria
Summer
Orange
Spain
Pear
Tomato
Italy
Summer
我目前的思路(伪R
代码):
df_old <- data.frame( x = c( "Apple", "Austria", "Summer", "Orange", "Spain", "Pear", "Tomato", "Italy", "Summer" ),
y = c( "Fruit", "Origin", "Season", "Fruit", "Origin", "Fruit", "Fruit", "Origin", "Season" ) )
df_new <- data.frame( matrix( ncol = 3, nrow = 0 ) )
colnames( df_new ) <- c( "Fruit", "Origin", "Season")
for ( i in seq_along( df_old ) ) {
if ( y == "Fruit" ) {
# add new row
df_new$Fruit <- df_old$x
} else if ( y == "Origin" ) {
df_new$Origin <- df_old$x
} else ( y == "Season" ) {
df_new$Season <- df_old$x
}
}
感谢您的帮助。
这是一个基于您使用 for-loop 给出的想法的解决方案。
df_old <- data.frame( x = c( "Apple", "Austria", "Summer", "Orange", "Spain", "Pear", "Tomato", "Italy", "Summer" ),
y = c( "Fruit", "Origin", "Season", "Fruit", "Origin", "Fruit", "Fruit", "Origin", "Season" ) ,stringsAsFactors=F)
df_new <- as.data.frame(matrix(NA, nrow=sum(df_old$y == "Fruit"), ncol=length(unique(df_old$y))))
names(df_new) <- c("Fruit", "Origin", "Season")
j <- 0
for (i in 1:(nrow(df_old))){
print(df_old$y[i])
if (df_old$y[i] == "Fruit") { j <- j + 1 ; df_new$Fruit[j] <- df_old$x[i]
print("new colum")
if ((df_old$y[i+1] == "Origin")){ df_new$Origin[j] <- df_old$x[i+1] }
print("add origin")
if ((df_old$y[i+1] == "Season") | (df_old$y[i+2] == "Season")){
df_new$Season[j] <- df_old$x[c(i+1,i+2)][df_old$y[c(i+1,i+2)] == "Season"]
print("add Season")
}
}
}
我有一个 table,它按以下方式在 x
列中存储观察结果,在 y
列中存储变量名称。
我正在尝试编写一个 R
循环来创建一个矩阵,其中每个观察值是一行,每个变量是一列。
问题是并不是所有的观测值都有所有的变量。
原始数据:
x | y |
---|---|
Apple | Fruit |
Austria | Origin |
Summer | Season |
Orange | Fruit |
Spain | Origin |
Pear | Fruit |
Tomato | Fruit |
Italy | Origin |
Summer | Season |
期望输出:
Fruit | Origin | Season |
---|---|---|
Apple | Austria | Summer |
Orange | Spain | |
Pear | ||
Tomato | Italy | Summer |
我目前的思路(伪R
代码):
df_old <- data.frame( x = c( "Apple", "Austria", "Summer", "Orange", "Spain", "Pear", "Tomato", "Italy", "Summer" ),
y = c( "Fruit", "Origin", "Season", "Fruit", "Origin", "Fruit", "Fruit", "Origin", "Season" ) )
df_new <- data.frame( matrix( ncol = 3, nrow = 0 ) )
colnames( df_new ) <- c( "Fruit", "Origin", "Season")
for ( i in seq_along( df_old ) ) {
if ( y == "Fruit" ) {
# add new row
df_new$Fruit <- df_old$x
} else if ( y == "Origin" ) {
df_new$Origin <- df_old$x
} else ( y == "Season" ) {
df_new$Season <- df_old$x
}
}
感谢您的帮助。
这是一个基于您使用 for-loop 给出的想法的解决方案。
df_old <- data.frame( x = c( "Apple", "Austria", "Summer", "Orange", "Spain", "Pear", "Tomato", "Italy", "Summer" ),
y = c( "Fruit", "Origin", "Season", "Fruit", "Origin", "Fruit", "Fruit", "Origin", "Season" ) ,stringsAsFactors=F)
df_new <- as.data.frame(matrix(NA, nrow=sum(df_old$y == "Fruit"), ncol=length(unique(df_old$y))))
names(df_new) <- c("Fruit", "Origin", "Season")
j <- 0
for (i in 1:(nrow(df_old))){
print(df_old$y[i])
if (df_old$y[i] == "Fruit") { j <- j + 1 ; df_new$Fruit[j] <- df_old$x[i]
print("new colum")
if ((df_old$y[i+1] == "Origin")){ df_new$Origin[j] <- df_old$x[i+1] }
print("add origin")
if ((df_old$y[i+1] == "Season") | (df_old$y[i+2] == "Season")){
df_new$Season[j] <- df_old$x[c(i+1,i+2)][df_old$y[c(i+1,i+2)] == "Season"]
print("add Season")
}
}
}