将长格式转换为宽格式 r
Convert long to wide format r
我正在尝试在 R 中将数据集从长格式转换为宽格式。
特别是,我的数据集如下所示:
[1] "progNum" "SbjID" "age"
[4] "gender" "ethnicity" "skinTone"
[7] "sbjName" "HR_powerBased" "HR_coherenceBased"
[10] "maxIdx_filt" "smoothy" "s2nrat"
[13] "nFrames" "vidDuration" "frameRate"
[16] "threshold" "warningMess" "nFacesDetected"
[19] "enoughPointsVisible" "luminance" "flag"
[22] "nPeaksAboveThreshold" "maxFreqDiff" "task"
[25] "trial" "allTaskNames" "allTaskSecs"
[28] "Names" "Beats" "Distance"
[31] "rPPGEstimatedBeats" "estimatedBeatsSmoothy" "HR_ECG"
[34] "Name" "participant"
我想保留所有变量,但我需要用“距离”和“试验”将“HR_ECG”和“平滑”分开。
距离可以是0或1
试用有 4 个级别:25、35、45、100
也就是说,最后,我需要一个如下所示的数据框:
progNum, sbjID, age [...] HR_ECG_close_25, HR_ECG_close_35, HR_ECG_close_45, HR_ECG_close_100, HR_ECG_far_25, HR_ECG_far_35,HR_ECG_far_45,HR_ECG_far_100
冰沙也一样:
smoothy_close_25, smoothy_close_35, smoothy_close_45, smoothy_close_100, smoothy_far_25, smoothy_far_35, smoothy_far_45, smoothy_far_100
我希望这是有道理的,非常感谢你
根据评论编辑 - 可重现的数据集:
structure(list(progNum = c(12, 13, 14, 15, 17, 18, 19, 20, 22,
23, 24, 25, 27, 28, 29, 30), SbjID = c(456465465, 456465465,
456465465, 456465465, 456465465, 456465465, 456465465, 456465465,
64846846846, 64846846846, 64846846846, 64846846846, 64846846846,
64846846846, 64846846846, 64846846846), age = c("19", "19", "19",
"19", "19", "19", "19", "19", "19", "19", "19", "19", "19", "19",
"19", "19"), gender = c("Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male"), smoothy = c(77.9221097737332, 78.5599580813492,
75.8424141201793, 78.6216428610833, 81.4167032250805, 76.9509617898643,
83.5251636058245, 76.5866099353627, 86.6511432503543, 86.3901538762173,
84.6411757168127, 87.0600014771307, 85.3731055604431, 81.5935438011446,
83.38581442316, 85.2329422916703), nFrames = c(599, 838, 1078,
2397, 599, 839, 1078, 2397, 599, 838, 1079, 2397, 598, 839, 1079,
2396), vidDuration = c(24.984, 34.952, 44.962, 99.975, 24.984,
34.994, 44.962, 99.975, 24.984, 34.952, 45.004, 99.975, 24.942,
34.994, 45.004, 99.934), frameRate = c(23.9353186039065, 23.947127489128,
23.9535607846626, 23.9659914978745, 23.9353186039065, 23.9469623364005,
23.9535607846626, 23.9659914978745, 23.9353186039065, 23.947127489128,
23.9534263621012, 23.9659914978745, 23.935530430599, 23.9469623364005,
23.9534263621012, 23.9658174395101), trial = c(25, 35, 45, 100,
25, 35, 45, 100, 25, 35, 45, 100, 25, 35, 45, 100), allTaskSecs = c("25",
"35", "45", "100", "25_Far", "35_Far", "45_Far", "100_Far", "25",
"35", "45", "100", "25_Far", "35_Far", "45_Far", "100_Far"),
Names = c("A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1",
"A1", "A2", "A2", "A2", "A2", "A2", "A2", "A2"), Beats = c(33,
46, 62, 130, 31, 47, 58, 132, 36, 48, 63, 144, 37, 52, 65,
146), Distance = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
1, 1, 1), rPPGEstimatedBeats = c(32.4675457390555, 45.8266422141203,
56.8818105901344, 131.036071435139, 33.9236263437836, 44.8880610440875,
62.6438727043684, 127.644349892271, 36.1046430209809, 50.3942564277934,
63.4808817876095, 145.100002461884, 35.5721273168513, 47.596233884001,
62.53936081737, 142.05490381945), estimatedBeatsSmoothy = c(32.4675457390555,
45.8266422141203, 56.8818105901344, 131.036071435139, 33.9236263437836,
44.8880610440875, 62.6438727043684, 127.644349892271, 36.1046430209809,
50.3942564277934, 63.4808817876095, 145.100002461884, 35.5721273168513,
47.596233884001, 62.53936081737, 142.05490381945), HR_ECG = c(79.2,
78.8571428571429, 82.6666666666667, 78, 74.4, 80.5714285714286,
77.3333333333333, 79.2, 86.4, 82.2857142857143, 84, 86.4,
88.8, 89.1428571428571, 86.6666666666667, 87.6), Name = c("A1",
"A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A2", "A2",
"A2", "A2", "A2", "A2", "A2")), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -16L))
您可以使用 tidyr
包中的 pivot_wider
:
library(tidyr)
df %>%
mutate(Distance = if_else(Distance == 0, "close", "far")) %>%
pivot_wider(names_from = c("Distance", "trial"), values_from = c("HR_ECG", "smoothy"))
您的数据存在的问题是其他列中的值对于每个 SbjID 都不是唯一的,因此您会在新创建的列中的透视数据中发现许多 NA。我想您可能想要 select
在旋转之前保留哪些列,或者您可能想要旋转更多列。
我正在尝试在 R 中将数据集从长格式转换为宽格式。
特别是,我的数据集如下所示:
[1] "progNum" "SbjID" "age"
[4] "gender" "ethnicity" "skinTone"
[7] "sbjName" "HR_powerBased" "HR_coherenceBased"
[10] "maxIdx_filt" "smoothy" "s2nrat"
[13] "nFrames" "vidDuration" "frameRate"
[16] "threshold" "warningMess" "nFacesDetected"
[19] "enoughPointsVisible" "luminance" "flag"
[22] "nPeaksAboveThreshold" "maxFreqDiff" "task"
[25] "trial" "allTaskNames" "allTaskSecs"
[28] "Names" "Beats" "Distance"
[31] "rPPGEstimatedBeats" "estimatedBeatsSmoothy" "HR_ECG"
[34] "Name" "participant"
我想保留所有变量,但我需要用“距离”和“试验”将“HR_ECG”和“平滑”分开。
距离可以是0或1 试用有 4 个级别:25、35、45、100
也就是说,最后,我需要一个如下所示的数据框:
progNum, sbjID, age [...] HR_ECG_close_25, HR_ECG_close_35, HR_ECG_close_45, HR_ECG_close_100, HR_ECG_far_25, HR_ECG_far_35,HR_ECG_far_45,HR_ECG_far_100 冰沙也一样: smoothy_close_25, smoothy_close_35, smoothy_close_45, smoothy_close_100, smoothy_far_25, smoothy_far_35, smoothy_far_45, smoothy_far_100
我希望这是有道理的,非常感谢你
根据评论编辑 - 可重现的数据集:
structure(list(progNum = c(12, 13, 14, 15, 17, 18, 19, 20, 22,
23, 24, 25, 27, 28, 29, 30), SbjID = c(456465465, 456465465,
456465465, 456465465, 456465465, 456465465, 456465465, 456465465,
64846846846, 64846846846, 64846846846, 64846846846, 64846846846,
64846846846, 64846846846, 64846846846), age = c("19", "19", "19",
"19", "19", "19", "19", "19", "19", "19", "19", "19", "19", "19",
"19", "19"), gender = c("Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male"), smoothy = c(77.9221097737332, 78.5599580813492,
75.8424141201793, 78.6216428610833, 81.4167032250805, 76.9509617898643,
83.5251636058245, 76.5866099353627, 86.6511432503543, 86.3901538762173,
84.6411757168127, 87.0600014771307, 85.3731055604431, 81.5935438011446,
83.38581442316, 85.2329422916703), nFrames = c(599, 838, 1078,
2397, 599, 839, 1078, 2397, 599, 838, 1079, 2397, 598, 839, 1079,
2396), vidDuration = c(24.984, 34.952, 44.962, 99.975, 24.984,
34.994, 44.962, 99.975, 24.984, 34.952, 45.004, 99.975, 24.942,
34.994, 45.004, 99.934), frameRate = c(23.9353186039065, 23.947127489128,
23.9535607846626, 23.9659914978745, 23.9353186039065, 23.9469623364005,
23.9535607846626, 23.9659914978745, 23.9353186039065, 23.947127489128,
23.9534263621012, 23.9659914978745, 23.935530430599, 23.9469623364005,
23.9534263621012, 23.9658174395101), trial = c(25, 35, 45, 100,
25, 35, 45, 100, 25, 35, 45, 100, 25, 35, 45, 100), allTaskSecs = c("25",
"35", "45", "100", "25_Far", "35_Far", "45_Far", "100_Far", "25",
"35", "45", "100", "25_Far", "35_Far", "45_Far", "100_Far"),
Names = c("A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1",
"A1", "A2", "A2", "A2", "A2", "A2", "A2", "A2"), Beats = c(33,
46, 62, 130, 31, 47, 58, 132, 36, 48, 63, 144, 37, 52, 65,
146), Distance = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
1, 1, 1), rPPGEstimatedBeats = c(32.4675457390555, 45.8266422141203,
56.8818105901344, 131.036071435139, 33.9236263437836, 44.8880610440875,
62.6438727043684, 127.644349892271, 36.1046430209809, 50.3942564277934,
63.4808817876095, 145.100002461884, 35.5721273168513, 47.596233884001,
62.53936081737, 142.05490381945), estimatedBeatsSmoothy = c(32.4675457390555,
45.8266422141203, 56.8818105901344, 131.036071435139, 33.9236263437836,
44.8880610440875, 62.6438727043684, 127.644349892271, 36.1046430209809,
50.3942564277934, 63.4808817876095, 145.100002461884, 35.5721273168513,
47.596233884001, 62.53936081737, 142.05490381945), HR_ECG = c(79.2,
78.8571428571429, 82.6666666666667, 78, 74.4, 80.5714285714286,
77.3333333333333, 79.2, 86.4, 82.2857142857143, 84, 86.4,
88.8, 89.1428571428571, 86.6666666666667, 87.6), Name = c("A1",
"A1", "A1", "A1", "A1", "A1", "A1", "A1", "A1", "A2", "A2",
"A2", "A2", "A2", "A2", "A2")), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -16L))
您可以使用 tidyr
包中的 pivot_wider
:
library(tidyr)
df %>%
mutate(Distance = if_else(Distance == 0, "close", "far")) %>%
pivot_wider(names_from = c("Distance", "trial"), values_from = c("HR_ECG", "smoothy"))
您的数据存在的问题是其他列中的值对于每个 SbjID 都不是唯一的,因此您会在新创建的列中的透视数据中发现许多 NA。我想您可能想要 select
在旋转之前保留哪些列,或者您可能想要旋转更多列。