根据条件创建新的“ID”列
Creating a new column of 'ID's' from a condition
我是 R 的新手,运行 遇到了一个非常具体的问题,我似乎无法在网上找到太多帮助。在堆栈溢出中提问似乎并不那么简单也无济于事。所以请和我裸露。
为了提供一些背景信息,这是一个非常大的数据集,包含 250,000 条记录和 9 列。
其中一列称为 'Site_ID'。
我现在有一个按 Site_ID 和日期排序的庞大数据集。我遇到的问题是站点 ID 通常不对应于一组坐标,我想要的是创建一个新列,在位置更改时提供新的站点 ID。由于 GPS 坐标有大量与之相关的错误,因此将它们分开的唯一方法基本上是 'flagging' 当站点 ID 为 'deployed' 并将相同的站点 ID 分配给以下所有记录时,直到下一次部署发生,然后再次开始该过程。
在伪代码中它会像....
NewColumn <- ifelse(Task_Type == "Deploy trap", assign int val, assign previous rows int val)
预期的输出将是一个新列,其中包含一系列新站点 ID,从 1:n 开始按降序排列(n = 无论数据集中有多少 Deploy)
请告诉我是否可以再提供,我有可用的数据快照,我只是不确定如何有效地使用 Whosebug 提问页面。
TRIALDATA$SITEIDDEPLOY = ifelse(TRIALDATA$Task_Type == 'Deploy trap', paste0(TRIALDATA$Site_ID, "_1"), TRIALDATA$Site_ID)
此代码示例是我之前用来实现附图中数据的代码示例。
结构(列表(经度= c(1414394, 1414394, 1414398, 1411206,
1411206, 1411206, 1411206, 1412729, 1412729, 1412729, 1412733,
1414625, 1414625, 1414623, 1414456, 1414456, 1414456, 1414456,
1414456, 1414456, 1414455, 1411308, 1411308, 1411308, 1411307,
1411215, 1411215, 1411215, 1411214, 1414286, 1414286, 1414286,
1414286, 1414292, 1409923, 1409923, 1409923, 1409923, 1409923,
1409920), 纬度 = c(4925150, 4925150, 4925147, 4921828, 4921828,
4921828, 4921827, 4923623, 4923623, 4923623, 4923620, 4925285,
4925285, 4925288, 4923812, 4923812, 4923812, 4923812, 4923812,
4923812, 4923802, 4922003, 4922003, 4922003, 4922002, 4921976,
4921976, 4921976, 4921974, 4924632, 4924632, 4924632, 4924632,
4924636, 4920300, 4920300, 4920300, 4920300, 4920300, 4920300
), Site_ID = c("100000060049", "100000060049", "100000060049",
"100000060070", "100000060070", "100000060070", "100000060070",
"100000060155", "100000060155", "100000060155", "100000060155",
"100000060155", "100000060155", "100000060155", "100000060155",
"100000060155", "100000060155", "100000060155", "100000060155",
"100000060155", "100000060155", "100000060155", "100000060155",
"100000060155", "100000060155", "100000060179", "100000060179",
"100000060179", "100000060179", "100000060209", "100000060209",
"100000060209", "100000060209", "100000060209", "100000060209",
"100000060209", "100000060209", "100000060209", "100000060209",
"100000060209"), Task_Type = c("部署陷阱", "检查陷阱", "移除陷阱",
“部署陷阱”、“检查陷阱”、“检查陷阱”、“删除陷阱”、“部署陷阱”、
“检查陷阱”、“检查陷阱”、“删除陷阱”、“部署陷阱”、“检查陷阱”、
“移除陷阱”、“部署陷阱”、“检查陷阱”、“检查陷阱”、“检查陷阱”、
“检查陷阱”、“检查陷阱”、“删除陷阱”、“部署陷阱”、“检查陷阱”、
“检查陷阱”、“删除陷阱”、“部署陷阱”、“检查陷阱”、“检查陷阱”、
“移除陷阱”、“部署陷阱”、“检查陷阱”、“检查陷阱”、“检查陷阱”、
“移除陷阱”、“部署陷阱”、“检查陷阱”、“检查陷阱”、“检查陷阱”、
"检查陷阱", "移除陷阱"), Task_Option = c("Leg-hold - possum",
"静止不动", "静止不动", "抱腿 - 负鼠", "静止不动", "静止不动",
“静止不动”,“腿保持 - 负鼠”,“静止不动”,“静止不动”,“静止不动”,
"Leg-hold - possum", "Still set", "Still set", "Leg-hold - possum",
“仍然设置”,“仍然设置”,“仍然设置”,“仍然设置”,“仍然设置”,
“静止不动”,“腿保持 - 负鼠”,“静止不动”,“静止不动”,“静止不动”,
"Leg-hold - possum", "Still set", "Still set", "Still set", "Leg-hold - possum",
“静止不动”,“静止不动”,“静止不动”,“静止不动”,“抱腿 - 负鼠”,
“仍然设置”,“仍然设置”,“仍然设置”,“仍然设置”,“仍然设置”
), Additional_Data = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), 时间 = c("11:32:41", "13:54:16",
"12:35:40", "09:23:50", "15:10:06", "14:44:31", "08:10:30", "11:33:14",
"14:53:36", "15:05:08", "12:46:37", "09:39:50", "13:54:17", "11:43:09",
"09:07:30", "14:32:55", "15:08:07", "16:05:35", "13:37:02", "14:50:06",
"08:57:05", "08:25:12", "15:10:07", "14:44:31", "08:03:35", "08:30:32",
"15:10:06", "14:44:31", "08:06:16", "12:59:51", "16:05:33", "13:37:05",
"12:36:12", "11:47:09", "14:24:09", "15:05:23", "12:58:22", "09:36:56",
“14:13:32”,“08:08:02”),日期 = 结构(c(1516060800,1516147200,
1516233600, 1517875200, 1517961600, 1518048000, 1518134400, 1515542400,
1515628800, 1515715200, 1515801600, 1516060800, 1516147200, 1516233600,
1516579200, 1516665600, 1516752000, 1516838400, 1516924800, 1517011200,
1517097600, 1517875200, 1517961600, 1518048000, 1518134400, 1517875200,
1517961600, 1518048000, 1518134400, 1516752000, 1516838400, 1516924800,
1517011200, 1517097600, 1518048000, 1518134400, 1518220800, 1518307200,
1518393600, 1518480000), tzone = "UTC", class = c("POSIXct",
"POSIXt")), SITEIDDEPLOY = c("100000060049_1", "100000060049",
"100000060049", "100000060070_1", "100000060070", "100000060070",
"100000060070", "100000060155_1", "100000060155", "100000060155",
"100000060155", "100000060155_1", "100000060155", "100000060155",
"100000060155_1", "100000060155", "100000060155", "100000060155",
"100000060155", "100000060155", "100000060155", "100000060155_1",
"100000060155", "100000060155", "100000060155", "100000060179_1",
"100000060179", "100000060179", "100000060179", "100000060209_1",
"100000060209", "100000060209", "100000060209", "100000060209",
"100000060209_1", "100000060209", "100000060209", "100000060209",
"100000060209", "100000060209"), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
你可以在这里使用cumsum
-
TRIALDATA$result <- cumsum(TRIALDATA$Task_Type == "Deploy trap")
计数保存在新列 result
中。这将在每次 Task_Type = "Deploy trap"
.
时增加计数
我是 R 的新手,运行 遇到了一个非常具体的问题,我似乎无法在网上找到太多帮助。在堆栈溢出中提问似乎并不那么简单也无济于事。所以请和我裸露。
为了提供一些背景信息,这是一个非常大的数据集,包含 250,000 条记录和 9 列。
其中一列称为 'Site_ID'。
我现在有一个按 Site_ID 和日期排序的庞大数据集。我遇到的问题是站点 ID 通常不对应于一组坐标,我想要的是创建一个新列,在位置更改时提供新的站点 ID。由于 GPS 坐标有大量与之相关的错误,因此将它们分开的唯一方法基本上是 'flagging' 当站点 ID 为 'deployed' 并将相同的站点 ID 分配给以下所有记录时,直到下一次部署发生,然后再次开始该过程。
在伪代码中它会像....
NewColumn <- ifelse(Task_Type == "Deploy trap", assign int val, assign previous rows int val)
预期的输出将是一个新列,其中包含一系列新站点 ID,从 1:n 开始按降序排列(n = 无论数据集中有多少 Deploy)
请告诉我是否可以再提供,我有可用的数据快照,我只是不确定如何有效地使用 Whosebug 提问页面。
TRIALDATA$SITEIDDEPLOY = ifelse(TRIALDATA$Task_Type == 'Deploy trap', paste0(TRIALDATA$Site_ID, "_1"), TRIALDATA$Site_ID)
此代码示例是我之前用来实现附图中数据的代码示例。
结构(列表(经度= c(1414394, 1414394, 1414398, 1411206, 1411206, 1411206, 1411206, 1412729, 1412729, 1412729, 1412733, 1414625, 1414625, 1414623, 1414456, 1414456, 1414456, 1414456, 1414456, 1414456, 1414455, 1411308, 1411308, 1411308, 1411307, 1411215, 1411215, 1411215, 1411214, 1414286, 1414286, 1414286, 1414286, 1414292, 1409923, 1409923, 1409923, 1409923, 1409923, 1409920), 纬度 = c(4925150, 4925150, 4925147, 4921828, 4921828, 4921828, 4921827, 4923623, 4923623, 4923623, 4923620, 4925285, 4925285, 4925288, 4923812, 4923812, 4923812, 4923812, 4923812, 4923812, 4923802, 4922003, 4922003, 4922003, 4922002, 4921976, 4921976, 4921976, 4921974, 4924632, 4924632, 4924632, 4924632, 4924636, 4920300, 4920300, 4920300, 4920300, 4920300, 4920300 ), Site_ID = c("100000060049", "100000060049", "100000060049", "100000060070", "100000060070", "100000060070", "100000060070", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060179", "100000060179", "100000060179", "100000060179", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209"), Task_Type = c("部署陷阱", "检查陷阱", "移除陷阱", “部署陷阱”、“检查陷阱”、“检查陷阱”、“删除陷阱”、“部署陷阱”、 “检查陷阱”、“检查陷阱”、“删除陷阱”、“部署陷阱”、“检查陷阱”、 “移除陷阱”、“部署陷阱”、“检查陷阱”、“检查陷阱”、“检查陷阱”、 “检查陷阱”、“检查陷阱”、“删除陷阱”、“部署陷阱”、“检查陷阱”、 “检查陷阱”、“删除陷阱”、“部署陷阱”、“检查陷阱”、“检查陷阱”、 “移除陷阱”、“部署陷阱”、“检查陷阱”、“检查陷阱”、“检查陷阱”、 “移除陷阱”、“部署陷阱”、“检查陷阱”、“检查陷阱”、“检查陷阱”、 "检查陷阱", "移除陷阱"), Task_Option = c("Leg-hold - possum", "静止不动", "静止不动", "抱腿 - 负鼠", "静止不动", "静止不动", “静止不动”,“腿保持 - 负鼠”,“静止不动”,“静止不动”,“静止不动”, "Leg-hold - possum", "Still set", "Still set", "Leg-hold - possum", “仍然设置”,“仍然设置”,“仍然设置”,“仍然设置”,“仍然设置”, “静止不动”,“腿保持 - 负鼠”,“静止不动”,“静止不动”,“静止不动”, "Leg-hold - possum", "Still set", "Still set", "Still set", "Leg-hold - possum", “静止不动”,“静止不动”,“静止不动”,“静止不动”,“抱腿 - 负鼠”, “仍然设置”,“仍然设置”,“仍然设置”,“仍然设置”,“仍然设置” ), Additional_Data = c(NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, NA_character_), 时间 = c("11:32:41", "13:54:16", "12:35:40", "09:23:50", "15:10:06", "14:44:31", "08:10:30", "11:33:14", "14:53:36", "15:05:08", "12:46:37", "09:39:50", "13:54:17", "11:43:09", "09:07:30", "14:32:55", "15:08:07", "16:05:35", "13:37:02", "14:50:06", "08:57:05", "08:25:12", "15:10:07", "14:44:31", "08:03:35", "08:30:32", "15:10:06", "14:44:31", "08:06:16", "12:59:51", "16:05:33", "13:37:05", "12:36:12", "11:47:09", "14:24:09", "15:05:23", "12:58:22", "09:36:56", “14:13:32”,“08:08:02”),日期 = 结构(c(1516060800,1516147200, 1516233600, 1517875200, 1517961600, 1518048000, 1518134400, 1515542400, 1515628800, 1515715200, 1515801600, 1516060800, 1516147200, 1516233600, 1516579200, 1516665600, 1516752000, 1516838400, 1516924800, 1517011200, 1517097600, 1517875200, 1517961600, 1518048000, 1518134400, 1517875200, 1517961600, 1518048000, 1518134400, 1516752000, 1516838400, 1516924800, 1517011200, 1517097600, 1518048000, 1518134400, 1518220800, 1518307200, 1518393600, 1518480000), tzone = "UTC", class = c("POSIXct", "POSIXt")), SITEIDDEPLOY = c("100000060049_1", "100000060049", "100000060049", "100000060070_1", "100000060070", "100000060070", "100000060070", "100000060155_1", "100000060155", "100000060155", "100000060155", "100000060155_1", "100000060155", "100000060155", "100000060155_1", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155", "100000060155_1", "100000060155", "100000060155", "100000060155", "100000060179_1", "100000060179", "100000060179", "100000060179", "100000060209_1", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209_1", "100000060209", "100000060209", "100000060209", "100000060209", "100000060209"), row.names = c(NA, -40L), class = c("tbl_df", "tbl", "data.frame"))
你可以在这里使用cumsum
-
TRIALDATA$result <- cumsum(TRIALDATA$Task_Type == "Deploy trap")
计数保存在新列 result
中。这将在每次 Task_Type = "Deploy trap"
.